/***************************************************************************************************************************\
* Date: 24 Feb 2020
*
* Purpose: Final data preparation and analyses
*
\***************************************************************************************************************************/

* Initialize the work environment

capture log close
capture file close _all
clear
clear matrix
clear mata
macro drop _all 
program drop _all
set linesize 125
set matsize 5000
set maxvar 9000
set more off
set scheme sj

adopath + "G:\Data\Workdata\702727\FBOI\ado\"
adopath + "G:\Data\Progs\Stata\"

* Folders

global basedir "G:\Data\Workdata\702727\FBOI"
global resdir "$basedir\Tables"
global logdir "$basedir\Logs"
global figdir "$basedir\Graphs"
global datadir "$basedir\Data"
global progdir "$basedir\Programs"

* Data files

global origdata "nicu_data_290115.dta"
global data_restat "siblings_restat"
global add_educ "track_rev19082019"
global add_labor_force "nicu_ls_rev060919"
global add_mental_health "nicu_data_290115"

global fc_all_data "focalchild"
global sib_data "siblings"
global fc_data "$sib_data"


*****
* OUTCOMES
*****

* Age intervals used for the construction of outcomes and indices

global age_gr_list "05 610 1115"

* Globals for defining the list of indices and the variables going into the creation of each index

global index_list "fc sib mom dad"
global index_list_fc "mortality st_health lt_health disability test_scores educ_beyond"
global index_list_sib "st_health lt_health test_scores educ_beyond"
global index_list_mom "st_labor lt_labor st_income lt_income st_mental_health lt_mental_health"
global index_list_dad "$index_list_mom"

** Construction of indices for focal children
* Mortality 

global fc_mortality "dead28d dead1y"

* Short-term health

global fc_st_health ""
forval i = 1 / 5 {
  global fc_st_health: display "$fc_st_health adm_`i'y"
}

* Long-term health

global fc_lt_health ""
forval i = 6 / 15 {
  global fc_lt_health: display "$fc_lt_health adm_`i'y er_`i'y"
}

* Disability

global fc_disability "cmr_010y cadhd_010y cbe_010y ccp_010y cep_010y"

* Test scores

global fc_test_scores "dansk_m_prove1 mat_s_prove1"

* Higher education

global fc_educ_beyond "enrolled_beyond ac_track_at_18 higher_enrollment24 uni_enrollment24"


** Construction of indices for siblings

foreach idx in $index_list_sib {
  global sib_`idx' ""
  foreach var in ${fc_`idx'} {
    if strpos("`var'", "010y") == 0 global sib_`idx': display "${sib_`idx'} sib_`var'"
  }
}


** Construction of indices for mothers
* Short-term outcomes

global mom_st_labor ""
global mom_st_income ""
global mom_st_mental_health ""
forval i = 1 / 5 {
  global mom_st_labor: display "$mom_st_labor m_intsup`i' m_extsup`i'"
  global mom_st_income: display "$mom_st_income real_m_income`i'"
  if `i' >= 2 global mom_st_mental_health: display "$mom_st_mental_health m_antidep`i'"
}

* Long-term outcomes

global mom_lt_labor ""
global mom_lt_income ""
global mom_lt_mental_health ""
forval i = 6 / 15 {
  global mom_lt_labor: display "$mom_lt_labor m_intsup`i' m_extsup`i'"
  global mom_lt_income: display "$mom_lt_income real_m_income`i'"
  global mom_lt_mental_health: display "$mom_lt_mental_health m_antidep`i'"
}


** Construction of indices for fathers

foreach idx in $index_list_dad {
  global dad_`idx': subinstr global mom_`idx' "m_" "f_", all
}


** Lists of indices
* Indices with significant results

global sig_index_list_fc "mortality lt_health test_scores"
global sig_index_list_sib "test_scores"
global sig_index_list_mom "st_mental_health"
global sig_index_list_dad ""

* Indices with insignificant results

foreach pers in fc sib mom dad {
  local list1: display "${index_list_`pers'}"
  local list2: display "${sig_index_list_`pers'}"
  global insig_index_list_`pers': list local(list1) - local(list2)
}

** Other classifications of outcomes

global fc_all_outcomes: display "dead28d dead1y dansk_m_prove1 mat_s_prove1 " ///
  "ac_track_at_18 enrolled_beyond " ///
  "cadhd_010y cmr_010y cbe_010y cep_010y ccp_010y cadm_615y cer_615y"
global fc_outcomes: display "$fc_all_outcomes"
global sib_outcomes: display "sib_cadm_05y sib_cadm_610y sib_cer_610y " ///
  "sib_dansk_m_prove1 sib_mat_s_prove1 sib_ac_track_at_18 sib_enrolled_beyond " ///
  "sib_uni_enrollment24 sib_higher_enrollment24"
global mom_outcomes: display "cm_intsup* cm_extsup* cm_antidep* ln_creal_m*"
global dad_outcomes: display "cf_intsup* cf_extsup* cf_antidep* ln_creal_f*"


********
* CONTROLS
********

global fc_all_controls: display "male ga i.birth_order multiple mom_age mom_elen_impute m_education_mis married_birth" ///
  " mom_immigrant i.birth_region i.birthyear"
global fc_controls: display "$fc_all_controls"
global sib_controls: display "$fc_all_controls sib_bw sib_male sib_multiple i.sib_birth_order i.sib_birthyear"
global mom_controls: display "$fc_controls"
global dad_controls: subinstr global mom_controls " m_" " f_", all
global dad_controls: subinstr global dad_controls " mom_" " dad_", all


********
* OTHER SETTINGS
********

* Sample restrictions

global startyr = 1982
global endyr = 1993
global yab = 15
global bw = 200
global bw_b = 150
global ll = 1500 - $bw
global ul = 1500 + $bw

global fc_cond "((dead1y ~= .) & (fc_tag == 1))"
global sib_cond "((dead1y ~= .) & (sib_birthyear <= 1997))"
global mom_cond "$fc_cond"
global dad_cond "$mom_cond"


* Aggregation levels for figures

global graph_agg = 25
global graph_bw = $bw


********
* CPI data (2015 = 100)
********

global cpi_1982 = 42.8
global cpi_1983 = 45.7
global cpi_1984 = 48.6
global cpi_1985 = 50.9
global cpi_1986 = 52.8
global cpi_1987 = 54.9
global cpi_1988 = 57.4
global cpi_1989 = 60.1
global cpi_1990 = 61.7
global cpi_1991 = 63.2
global cpi_1992 = 64.5
global cpi_1993 = 65.3
global cpi_1994 = 66.6
global cpi_1995 = 68.0
global cpi_1996 = 69.5
global cpi_1997 = 71.0
global cpi_1998 = 72.3
global cpi_1999 = 74.1
global cpi_2000 = 76.2
global cpi_2001 = 78.0
global cpi_2002 = 79.9
global cpi_2003 = 81.6
global cpi_2004 = 82.5
global cpi_2005 = 84.0
global cpi_2006 = 85.6
global cpi_2007 = 87.1
global cpi_2008 = 90.1
global cpi_2009 = 91.2
global cpi_2010 = 93.3


cd "$progdir"


*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
*  0. UTILITIES
*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*

**********************
**** A program to produce empty results that can be used to "fill" an estout table
**********************

program define emptycol, eclass

  tempname b V
  
  ereturn clear
      
  matrix `b' = J(1, 1, 0)
  matrix rownames `b' = y1
  matrix colnames `b' = emptycol_coef

  matrix `V' = J(1, 1, 0)
  matrix rownames `V' = emptycol_coef
  matrix colnames `V' = emptycol_coef

  ereturn post `b' `V'
end


**********************
**** A program for reversing the signs of regression coefficients (because rdrobust assumes treatment to the right), as
**** well as saving some additional info that we will report
**********************

program define rdrobust2, eclass
	syntax anything [if] [in] [, c(real 0) covs(string) deriv(real 0) fuzzy(string) p(real 1) q(real 0) h(string) ///
		b(string) rho(real 0) kernel(string) bwselect(string) vce(string) level(real 95) all scalepar(real 1) ///
		scaleregul(real 1) stats]
	
	tempvar wgt esample vlbw f1 f2
	tempname minus_b new_V tau stars md Nobs
	
	tokenize "`anything'"
	local y `1'
	local x `2'	
	marksample touse
	
	quietly {
		if inlist(lower("`kernel'"), "uniform", "uni") generate `wgt' = 1 if `touse'
		else generate `wgt' = 1 - abs((`x' - `c') / `h') if `touse'
		generate `vlbw' = (`x' < `c') if `touse'
		generate `f1' = (`x' - `c') * `vlbw' if `touse'
		generate `f2' = (`x' - `c') * (1 -`vlbw') if `touse'
	}
	if "`covs'" ~= "" {
		fvrevar `covs'
		local initial_controls: display r(varlist)
	}
	else local initial_controls ""
	
	local final_controls ""
	quietly regress `y' `vlbw' `f1' `f2' `initial_controls' [pw = `wgt'] if `touse' & inrange(`x', `c' - `h', `c' + `h')
	foreach control_var in `initial_controls' {
		if _se[`control_var'] ~= 0 local final_controls: di "`final_controls' `control_var'"
	}
	if !inlist(lower("`kernel'"), "uniform", "uni") {
		quietly replace `wgt' = 1 / `h' * cond(`c' < `h', -1, 1)  if `touse' & inlist(`x', `c' - `h', `c' + `h')
		quietly regress `y' `vlbw' `f1' `f2' `initial_controls' [pw = `wgt'] if `touse'
	}
	local N_obs = e(N)
	mark `esample' if e(sample)
	
	if "`final_controls'" == "" local covs_par ""
	else local covs_par "covs(`final_controls')"
	foreach parameter in h b kernel vce {
		if "``parameter''" == "" local `parameter'_par ""
		else local `parameter'_par  "`parameter'(``parameter'')"
	}
	
	rdrobust `y' `x' if `touse', c(`c') `covs_par' p(`p') q(`q') `h_par' `b_par' rho(`rho') `kernel_par' `vce_par'
	
	* Save the bias-corrected estimate in a different matrix
	
	matrix `tau' = -e(tau_bc)
	matrix colnames `tau' = "RD_Estimate"
	matrix rownames `tau' = "y1"
	ereturn matrix tau = `tau'
	
	* Save the conventional estimate as the main estimate
	
	matrix `minus_b' = -_b[RD_Estimate]
	ereturn repost b = `minus_b', esample(`esample')
	
	* Save the robust standard error as the main standard error
	
	matrix `new_V' = e(se_tau_rb)^2
	ereturn repost V = `new_V'
	
	* Save the robust p-value in a separate matrix
	
	matrix `stars' = (e(pv_rb))
	matrix colnames `stars' = "RD_Estimate"
	matrix rownames `stars' = "y1"
	ereturn matrix pv_robust = `stars'
	ereturn scalar N = `N_obs'
	
	* Save the mean of the dependent variable and number of observations in separate matrices (for summary statistics)
	
	if "`stats'" ~= "" {
		summarize `y' if e(sample) & (`x' >= `c'), meanonly
		matrix `md' = (r(mean))
		matrix colnames `md' = "RD_Estimate"
		matrix rownames `md' = "y1"
		ereturn matrix mean_dep = `md'
		
		matrix `Nobs' = (`N_obs')
		matrix colnames `Nobs' = "RD_Estimate"
		matrix rownames `Nobs' = "y1"
		ereturn matrix Nobs = `Nobs'
	}
	
	* Save the bandwidth, the confidence intervals (robust and standard), and the mean of dependent variable as locals
	
	ereturn scalar bw = `h'
	ereturn local conventional_ci = "[" ///
		+ trim(string(_b[RD_Estimate] - invnormal(0.975) * _se[RD_Estimate], "%12.3fc")) + ", " ///
		+ trim(string(_b[RD_Estimate] + invnormal(0.975) * _se[RD_Estimate], "%12.3fc")) + "]"
	ereturn local robust_ci = "[" + trim(string(-e(ci_r_rb), "%12.3fc")) + ", " ///
		+ trim(string(-e(ci_l_rb), "%12.3fc")) + "]"
	summarize `y' if e(sample) & (`x' >= `c'), meanonly
	ereturn scalar mean_right = r(mean)
	
end


**********************
**** A program to generate BKY (2006) sharpened two-stage q-values as described in Anderson (2008)
**** Code adapted from the one provided by Mike Anderson
**********************

program define bky_sharpq
	syntax namelist(max=1 name=pval_matrix), ESTimates(namelist)
  
	clear
	quietly {
	  svmat `pval_matrix', names(pval)
	  count if pval != .
	  local totalpvals = r(N)
	}
	
	* Sort the p-values in ascending order and generate a variable that codes each p-value's rank
	
	quietly {
	  gen int original_sorting_order = _n
	  sort pval
	  gen int rank = _n if pval~=.
	}
	
	* Set the initial counter to 1 
	
	local qval = 1
	
	* Generate the variable that will contain the BKY (2006) sharpened q-values
	
	quietly gen bky06_qval = 1 if pval~=.
	
	* Set up a loop that begins by checking which hypotheses are rejected at q = 1.000, then checks which hypotheses 
	* are rejected at q = 0.999, then checks which hypotheses are rejected at q = 0.998, etc. The loop ends by 
	* checking which hypotheses are rejected at q = 0.001.
	
	while `qval' > 0 {
    * First Stage
    * Generate the adjusted first stage q level we are testing: q' = q / (1 + q)
    
    local qval_adj = `qval' / ( 1 + `qval')
    
    * Generate value q' * r / M
    
    quietly gen fdr_temp1 = `qval_adj' * rank / `totalpvals'
    
    * Generate binary variable checking condition p(r) <= q' * r / M
    
    quietly gen reject_temp1 = (fdr_temp1 >= pval) if pval ~= .
    
    * Generate variable containing p-value ranks for all p-values that meet above condition
    
    quietly gen reject_rank1 = reject_temp1 * rank
    
    * Record the rank of the largest p-value that meets above condition
    
    quietly gegen total_rejected1 = max(reject_rank1)

    * Second Stage
    * Generate the second stage q level that accounts for hypotheses rejected in first stage: q_2st = q' * (M / m0)
    
    local qval_2st = `qval_adj' * (`totalpvals' / (`totalpvals' - total_rejected1[1]))
    
    * Generate value q_2st * r / M
    
    quietly gen fdr_temp2 = `qval_2st' * rank / `totalpvals'
    
    * Generate binary variable checking condition p(r) <= q_2st * r / M
    
    quietly gen reject_temp2 = (fdr_temp2 >= pval) if pval ~= .
    
    * Generate variable containing p-value ranks for all p-values that meet above condition
    
    quietly gen reject_rank2 = reject_temp2 * rank
    
    * Record the rank of the largest p-value that meets above condition
    
    quietly gegen total_rejected2 = max(reject_rank2)

    * A p-value has been rejected at level q if its rank is less than or equal to the rank of the max p-value that 
    * meets the above condition
    
    quietly replace bky06_qval = `qval' if rank <= total_rejected2 & rank ~= .
    
    * Reduce q by 0.001 and repeat loop
    
    quietly drop fdr_temp* reject_temp* reject_rank* total_rejected*
    local qval = `qval' - .001
	}
	
	* Add the calculated q-values to the original results
	
	quietly sort original_sorting_order
	
	local i = 1
	foreach est in `estimates' {
	  quietly {
	    estimates restore `est'
	    matrix bky_q = bky06_qval[`i']
  	  matrix colnames bky_q = "RD_Estimate"
  	  matrix rownames bky_q = "y1"
  	  estadd matrix bky_q = bky_q
  	  local i = `i' + 1
  	}
	}

end


**********************
**** A program for reversing the signs of regression coefficients and formatting the output of rdqtese (RD quantile
**** treatment effects)
**********************

program define rdqtese2, eclass
	syntax varlist(min=1 max=1 numeric) [if] [in] , QUANTILES(numlist) [BSREPS(int -1)] [LEVEL(int 95)]
	
	tempvar norm_bw minus_vlbw
	tempname b V
	
	tokenize `varlist'
	local y `1'
	marksample touse
	
	if "`quantiles'" == "" local quantiles ".25 .5 .75"
	
	generate `norm_bw' = bw - 1500
	generate `minus_vlbw' = (`norm_bw' >= 0)
	
	count if `y' ~= . & `minus_vlbw' ~= . & `touse'
	local N = r(N)
	
	rdqtese `y' `minus_vlbw' if `touse', running(`norm_bw') quantiles(`quantiles') level(`level') bsreps(`bsreps')
	
	local bwlist = e(hmeans)
	local converged = (e(conv0) + e(conv1))
	
	* Save the estimated quantile treatment effects as main estimates and the squares of the estimated standard errors as 
	* the main diagonal of the covariance matrix
	
	matrix `b' = -e(lqte)'
	matrix `V' = diag(e(se)) * diag(e(se))
	
	local nq = rowsof(e(quantiles))
	local rownames ""
	forval i = 1 / `nq' {
		local rownames: di "`rownames' q" el(e(quantiles), `i', 1)
	}
	local rownames: subinstr local rownames "." "", all
	matrix colnames `b' = `rownames'
	matrix rownames `V' = `rownames'
	matrix colnames `V' = `rownames'
	ereturn post `b' `V'
	
	* Save the number of observations, the list of bandwidths, and the mean of dependent variable as locals

	ereturn scalar N = `N'
	ereturn local bw_list = "`bwlist'"
	ereturn local converged = cond(`converged' == 0, "Yes", "No")
	
	* summarize `y' if e(sample) & (`x' >= `c'), meanonly
	* ereturn scalar mean_right = r(mean)
	
end



*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
*  I. DATA PREPARATION
*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*

**********************
**** Data preparation: import and some basic variable generation
**********************

program define dataprep_initial
	
	set more off
	log using "$logdir\dataprep_initial.log", text replace
	use "$datadir/$origdata", clear
	
	* Selecting relevant sample - born from 1982 (because of missing gestational age)
	
	drop if birthyear < $startyr
	
	* Heaping dummies
	
	gen heap100 = (mod(bw, 100) == 0)
	gen heap50 = (mod(bw, 50) == 0)
	gen heap100_all = trunc(bw / 100) * (mod(bw, 100) == 0)
	gen heap50_all = trunc(bw / 50) * (mod(bw, 50) == 0)
	
	* Generating triangular weights for our badnwidth
	
	gen twgt = 1 - abs((bw - 1500) / $bw)

	* Cutoff and polynomials
	
	gen vlbw = (bw < 1500)
	gen f1 = (bw - 1500) * vlbw
	gen f2 = (bw - 1500) * (1 - vlbw)

	* Primary school graduation
	
	gen primscl = (dansk_m_prove1 != . & mat_s_prove1 != .) if birthyear >= 1987
	
	compress
	save "$datadir/$fc_all_data", replace
	log close

end


**********************
**** Data preparation: add dates of death for children and parents
**********************

program define dataprep_death_dates
	
	set more off
	log using "$logdir\dataprep_death_dates", text replace
	
	* Add the dates of death to the focal child data
	
	use "$datadir/$fc_all_data", clear
	merge 1:1 pnr using "$datadir\death_dates", keep(match master)
	drop _merge
	compress
	save "$datadir/$fc_all_data", replace
	
	log close
	
end


**********************
**** Data preparation: information about stillborn babies
**********************

program define dataprep_stillborn
	
	set more off
	log using "$logdir\dataprep_stillborn", text replace
	tempfile all_stillborn one_stillborn
	
	* Prepare the stillborn data
	
	use "$datadir/stillborns", clear
	keep pnrm *_date *_gender
	duplicates drop
	sort pnrm
	compress
	save "`all_stillborn'", replace
	
	forval i = 1 / 3 {
		use "`all_stillborn'", clear
		rename still`i'_gender male
		rename still`i'_date fodtdato
		keep if ~missing(fodtdato)
		keep pnrm male fodtdato
		compress
		save "`one_stillborn'", replace
		
		use "$datadir/$fc_all_data", clear
		merge m:1 pnrm male fodtdato using "`one_stillborn'", keep(match master)
		generate potential_stillborn`i' = (_merge == 3)
		drop _merge
		save "$datadir/$fc_all_data", replace
	}
	
	use "$datadir/$fc_all_data", clear
	generate potential_stillborn = potential_stillborn1 | potential_stillborn2 | potential_stillborn3
	label variable potential_stillborn "Newborn could be a stillborn or have a stillborn twin"
	drop potential_stillborn?
	egen family_stillborn = max(potential_stillborn), by(pnrm fodtdato)
	label variable family_stillborn "Indicator for potential stillborn babies in the family"
	generate singleton_stillborn = potential_stillborn & (~multiple)
	label variable singleton_stillborn "Newborn could be a stillborn or only live birth in a multiple birth"
	compress
	save "$datadir/$fc_all_data", replace
	
	log close
	
end


**********************
**** Data preparation: information about academic achievement
**********************

program define dataprep_academic_achievement
	
	set more off
	log using "$logdir\dataprep_academic_achievement", text replace
	
	* Prepare the stillborn data
	
	use "$datadir/$fc_all_data", clear
	merge 1:1 pnr using "$datadir/track_260116", keep(match master)
	drop _merge
	generate enrolled_beyond = ac_track | voc_track if ~missing(ac_track)
	compress
	save "$datadir/$fc_all_data", replace
	
	log close
	
end


**********************
**** Data preparation: add hospitalization info for focal children
**********************

program define dataprep_fc_hosp
	
	set more off
	log using "$logdir\dataprep_fc_hosp", text replace
	tempfile healthfc
	
	* Create the list of focal children and their birth dates
	
	use "$datadir/$fc_all_data", clear
	keep pnr pnrm fodtdato deathdate
	save "$datadir\fclist", replace
	
	* Create the list of admissions for the focal children
	
	use "$datadir\fclist", clear
	merge 1:m pnr using "$datadir\hospital", keepusing(inddto pattype henm) keep(match)  /* took out "master" */
	duplicates drop pnr inddto, force
	drop _merge
	compress
	
	* Create indicators for hospital admission up to X years after birth
	
	keep if inlist(pattype, 0, 3)
	local nadm ""
	local ner ""
	forval yab = 0 / $yab {
		generate adm_`yab'y = (`yab' * 365 <= (inddto - fodtdato)) & ((inddto - fodtdato) < (`yab' + 1) * 365) ///
			if pattype == 0
		local nadm "`nadm' nadm_`yab'y = adm_`yab'y"
		generate er_`yab'y = (`yab' * 365 <= (inddto - fodtdato)) & ((inddto - fodtdato) < (`yab' + 1) * 365) ///
			if pattype == 3
		local ner "`ner' ner_`yab'y = er_`yab'y"
	}
	
	* Create variables for any and number of hospital admissions up to X years after birth
	
	collapse (max) adm_* er_* (sum) `nadm' `ner', by(pnr)
	forval yab = 0 / $yab {
		label variable adm_`yab'y "In-patient hospital admission at age `yab'"
		label variable nadm_`yab'y "Number of in-patient hospital admissions at age `yab'"
		label variable er_`yab'y "ER visits at age `yab'"
		label variable ner_`yab'y "Number of ER visits at age `yab'"
	}
	compress
	save "`healthfc'", replace
	
	* Merge back to our analysis sample
	
	use "$datadir/$fc_all_data", clear
	merge 1:1 pnr using "`healthfc'"
	drop _merge
	foreach var of varlist adm_* nadm_* {
		replace `var' = 0 if `var' == .
	}
	forval yab = 0 / $yab {
		replace er_`yab'y = . if birthyear + `yab' < 1995
		replace er_`yab'y = 0 if (birthyear + `yab' >= 1995) & (er_`yab'y == .)
		replace ner_`yab'y = . if birthyear + `yab' < 1995
		replace ner_`yab'y = 0 if (birthyear + `yab' >= 1995) & (ner_`yab'y == .)
		replace er_`yab'y = . if (deathdate - fodtdato) < `yab' * 365 & deathdate ~= .
		replace ner_`yab'y = . if (deathdate - fodtdato) < `yab' * 365 & deathdate ~= .
		replace adm_`yab'y = . if (deathdate - fodtdato) < `yab' * 365 & deathdate ~= .
		replace nadm_`yab'y = . if (deathdate - fodtdato) < `yab' * 365 & deathdate ~= .
	}
	
	egen cadm_05y = rowmax(adm_0y adm_1y adm_2y adm_3y adm_4y adm_5y)
	egen cnadm_05y = rowtotal(nadm_0y nadm_1y nadm_2y nadm_3y nadm_4y nadm_5y), missing
	egen cadm_15y = rowmax(adm_1y adm_2y adm_3y adm_4y adm_5y)
	egen cnadm_15y = rowtotal(nadm_1y nadm_2y nadm_3y nadm_4y nadm_5y), missing
	egen cadm_610y = rowmax(adm_6y adm_7y adm_8y adm_9y adm_10y)
	egen cnadm_610y = rowtotal(nadm_6y nadm_7y nadm_8y nadm_9y nadm_10y), missing
	egen cadm_1115y = rowmax(adm_11y adm_12y adm_13y adm_14y adm_15y)
	egen cnadm_1115y = rowtotal(nadm_11y nadm_12y nadm_13y nadm_14y nadm_15y), missing
	
	egen cer_610y = rowmax(er_6y er_7y er_8y er_9y er_10y)
	replace cer_610y = . if birthyear < 1989
	egen cner_610y = rowtotal(ner_6y ner_7y ner_8y ner_9y ner_10y), missing
	replace cner_610y = . if birthyear < 1989
	egen cer_1115y = rowmax(er_11y er_12y er_13y er_14y er_15y)
	replace cer_1115y = . if birthyear < 1984
	egen cner_1115y = rowtotal(ner_11y ner_12y ner_13y ner_14y ner_15y), missing
	replace cner_1115y = . if birthyear < 1984
	
	compress
	save "$datadir/$fc_all_data", replace
	
	log close
	
end


**********************
**** Data preparation: add diagnosis data for focal children
**********************

program define dataprep_fc_diagnosis
	
	set more off
	log using "$logdir\dataprep_fc_diagnosis", text replace
	tempfile diags
	
	* Create the list of focal children and their birth dates
	
	use pnr fodtdato using "$datadir/$fc_all_data", clear
	merge 1:m pnr using "$datadir\diagnosis", keep(match)
	drop _merge
	compress
	
	* Create indicators for diagnoses to X years after birth
	
	local diaglist ""
	local diagvarlist ""
	foreach var of varlist *_date {
		local diag = subinstr("`var'", "_date", "", .)
		local diagvarlist "`diagvarlist' `diag'_*y"
		local diaglist "`diaglist' `diag'"
		forval yab = 0 / $yab {
			generate `diag'_`yab'y = (`yab' * 365 <= (`diag'_date - fodtdato)) & ((`diag'_date - fodtdato) < (`yab' + 1) * 365)
		}
	}
	collapse (max) `diagvarlist', by(pnr)
	save "`diags'", replace
	
	* Merge back to our analysis sample
	
	use "$datadir/$fc_all_data", clear
	merge 1:1 pnr using "`diags'"
	drop _merge
	foreach diag in `diaglist' {
		forval yab = 0 / $yab {
			replace `diag'_`yab'y = 0 if `diag'_`yab'y == .
			replace `diag'_`yab'y = . if (deathdate - fodtdato) < `yab' * 365 & deathdate ~= .
		}
		egen c`diag'_05y = rowmax(`diag'_0y `diag'_1y `diag'_2y `diag'_3y `diag'_4y `diag'_5y)
		egen c`diag'_15y = rowmax(`diag'_1y `diag'_2y `diag'_3y `diag'_4y `diag'_5y)
		egen c`diag'_010y = rowmax(c`diag'_05y `diag'_6y `diag'_7y `diag'_8y `diag'_9y `diag'_10y)
		egen c`diag'_015y = rowmax(c`diag'_010y `diag'_11y `diag'_12y `diag'_13y `diag'_14y `diag'_15y)
	}
	
	compress
	save "$datadir/$fc_all_data", replace
	
	log close
	
end


**********************
**** Data preparation: add info on older and younger siblings, including hospitalizations
**********************

program define dataprep_siblings
	
	set more off
	log using "$logdir\dataprep_siblings", text replace
	tempfile healths
	
	* Create the list of focal children and their birth dates
	
	use pnr pnrm fodtdato deathdate using "$datadir/$fc_all_data", clear
	save "$datadir\fclist", replace
	
	* Create the list of "siblings" and their birth dates
	
	use pnr pnrm fodtdato birthyear birth_order bw ga male multiple dansk_m_prove1 mat_s_prove1 parenthood20 ///
		using "$datadir/$origdata", clear
	* Add birth dates (not in "original data")
	merge 1:1 pnr using "$datadir\death_dates", keep(match master) keepusing(deathdate)
	drop _merge
	* Add academic achievement (not in "original data")
	merge 1:1 pnr using "$datadir/track_260116", keep(match master)
	drop _merge
	generate enrolled_beyond = ac_track | voc_track if ~missing(ac_track)
	compress
	foreach var of varlist _all {
		if "`var'" ~= "pnrm" rename `var' sib_`var'
	}
	egen temp = mean(sib_bw), by(sib_birthyear)
	replace sib_bw = temp if sib_bw == .
	drop temp
	save "`healths'", replace
	
	* Create the list of siblings
	
	use "$datadir\fclist", clear
	joinby pnrm using "`healths'"
	drop if pnr == sib_pnr
	generate older_sib = (sib_fodtdato < fodtdato - 100)
	generate younger_sib = (sib_fodtdato > fodtdato + 100)
	save "$datadir\sibling_pairs", replace
	
	* Create the list of admissions for the siblings
	
	use "$datadir\sibling_pairs", clear
	drop if (older_sib == 0 & younger_sib == 0)
	rename pnr pnrfc
	rename sib_pnr pnr
	
	joinby pnr using "$datadir\hospital"
	drop recnum adiag sgh diag diagtype
	duplicates drop pnr pnrfc inddto, force
	compress
	
	* Create indicators for hospital admission up to 10 years after birth
	
	keep if inlist(pattype, 0, 3)
	local nadm ""
	local ner ""
	forval yab = 0 / $yab {
		generate adm_`yab'y = (`yab' * 365 <= (inddto - fodtdato)) & ((inddto - fodtdato) < (`yab' + 1) * 365) ///
			if pattype == 0
		local nadm "`nadm' nadm_`yab'y = adm_`yab'y"
		generate er_`yab'y = (`yab' * 365 <= (inddto - fodtdato)) & ((inddto - fodtdato) < (`yab' + 1) * 365) ///
			if pattype == 3
		local ner "`ner' ner_`yab'y = er_`yab'y"
	}
	
	* Create variables for any and for the numer of hospital admissions up to 15 years after birth
	
	collapse (max) adm_* er_* older_sib younger_sib (sum) `nadm' `ner', by(pnr pnrfc)
	forval yab = 0 / $yab {
		label variable adm_`yab'y "In-patient hospital admission of sibling at age `yab' of focal child"
		label variable nadm_`yab'y "Numer of in-patient hospital admissions of sibling at age `yab' of focal child"
		label variable er_`yab'y "ER visits of sibling at age `yab' of focal child"
		label variable ner_`yab'y "Numer of ER visits of sibling at age `yab' of focal child"
	}
	foreach var of varlist adm_* nadm_* er_* ner_* {
		rename `var' sib_`var'
	}
	compress
	rename pnr sib_pnr
	rename pnrfc pnr
	save "`healths'", replace
	
	* Merge back to our analysis sample
	
	use "$datadir\sibling_pairs", clear
	keep if (older_sib == 1 | younger_sib == 1)
	merge 1:1 pnr sib_pnr using "`healths'"
	drop _merge
	foreach var of varlist sib_adm_* sib_nadm_* {
		replace `var' = 0 if `var' == .
	}
	forval yr = 0 / $yab {
		replace sib_er_`yr'y = . if year(fodtdato) + `yr' < 1995
		replace sib_er_`yr'y = 0 if (year(fodtdato) + `yr' >= 1995) & (sib_er_`yr'y == .)
		replace sib_ner_`yr'y = . if year(fodtdato) + `yr' < 1995
		replace sib_ner_`yr'y = 0 if (year(fodtdato) + `yr' >= 1995) & (sib_ner_`yr'y == .)
	}
	
	egen sib_cadm_05y = rowmax(sib_adm_0y sib_adm_1y sib_adm_2y sib_adm_3y sib_adm_4y sib_adm_5y)
	egen sib_cnadm_05y = rowtotal(sib_nadm_0y sib_nadm_1y sib_nadm_2y sib_nadm_3y sib_nadm_4y sib_nadm_5y), missing
	egen sib_cadm_15y = rowmax(sib_adm_1y sib_adm_2y sib_adm_3y sib_adm_4y sib_adm_5y)
	egen sib_cnadm_15y = rowtotal(sib_nadm_1y sib_nadm_2y sib_nadm_3y sib_nadm_4y sib_nadm_5y), missing
	egen sib_cadm_610y = rowmax(sib_adm_6y sib_adm_7y sib_adm_8y sib_adm_9y sib_adm_10y)
	egen sib_cnadm_610y = rowtotal(sib_nadm_6y sib_nadm_7y sib_nadm_8y sib_nadm_9y sib_nadm_10y), missing
	egen sib_cadm_1115y = rowmax(sib_adm_11y sib_adm_12y sib_adm_13y sib_adm_14y sib_adm_15y)
	egen sib_cnadm_1115y = rowtotal(sib_nadm_11y sib_nadm_12y sib_nadm_13y sib_nadm_14y sib_nadm_15y), missing
	
	egen sib_cer_610y = rowmax(sib_er_6y sib_er_7y sib_er_8y sib_er_9y sib_er_10y)
	replace sib_cer_610y = . if year(fodtdato) < 1989
	egen sib_cner_610y = rowtotal(sib_ner_6y sib_ner_7y sib_ner_8y sib_ner_9y sib_ner_10y), missing
	replace sib_cner_610y = . if year(fodtdato) < 1989
	egen sib_cer_1115y = rowmax(sib_er_11y sib_er_12y sib_er_13y sib_er_14y sib_er_15y)
	replace sib_cer_1115y = . if year(fodtdato) < 1984
	egen sib_cner_1115y = rowtotal(sib_ner_11y sib_ner_12y sib_ner_13y sib_ner_14y sib_ner_15y), missing
	replace sib_cner_1115y = . if year(fodtdato) < 1984
	
	gen sib_age = abs(fodtdato - sib_fodtdato) / 365
	gen agediff_old = (fodtdato - sib_fodtdato) / 365 if fodtdato > sib_fodtdato
	gen agediff_young = -(fodtdato - sib_fodtdato) / 365 if fodtdato < sib_fodtdato
	
	foreach yr in "05" 10 15 {
		foreach var of varlist sib_*`yr'y {
			di "replace `var' = . if agediff_young > `yr'"
		}
	}
	
	compress
	save "`healths'", replace
	
	use using "$datadir\focalchild", clear
	merge 1:m pnr using "`healths'", keep(match master)
	drop _merge
	drop if missing(sib_pnr)
	compress
	save "$datadir\siblings", replace
	
	use pnr younger_sib	using "`healths'", clear
	collapse (sum) younger_sibs = younger_sib, by(pnr)
	save "`healths'", replace
	use using "$datadir\focalchild", clear
	merge 1:1 pnr using "`healths'", keep(match master)
	drop _merge
	replace younger_sibs = 0 if younger_sibs == .
	compress
	save "$datadir\focalchild", replace
	
	log close
	
end


**********************
**** Data preparation: Generate predicted outcomes and some additional variables needed in the regressions; drop observations
**** with missing birth weight and gestational age; get summary statistics for included vs dropped observations
**********************

program define dataprep_add_vars
	
	set more off
	
	local fc_all_prefix `" "" "'
	local sib_prefix `" "" sib_"'
	
	local add_controls "apgar dad_missing dad_age dad_elen_impute f_education_mis dad_immigrant m_employed m_employed_mis"
	local add_controls "`add_controls' f_employed f_employed_mis m_income f_income total_income *took_* *_before"
	local fc_all_add_controls "family_size younger_sibs has_younger_sibs"
	local sib_add_controls "agediff_*"
	local fc_add_controls "`fc_all_add_controls'"
	
	local add_controls "apgar dad_missing dad_age dad_elen_impute f_education_mis dad_immigrant m_employed m_employed_mis"
	local add_controls "`add_controls' f_employed f_employed_mis m_income f_income total_income *took_* *_before"
	local pred_outcomes "pred_*"
	
	local fc_all_controls: subinstr global fc_all_controls "i." "", all
	local sib_controls: subinstr global sib_controls "i." "", all
	local sib_controls "`sib_controls' sib_age"
	local fc_controls "`fc_all_controls'"	
	
	log using "$logdir\dataprep_dropped_obs.log", text replace
	
	foreach kid in fc_all sib {
		use "$datadir/${`kid'_data}", clear
		
		* Drop existing predicted outcomes, if applicable, and any other previously generated variables
		
		capture drop pred_*
		capture drop took_*
		capture drop sib_took*
		
		* Recode income variables based on missing indicators
		
		replace m_income = . if m_income_mis == 1
		replace f_income = . if f_income_mis == 1
		replace total_income = . if (m_income_mis == 1 & f_income_mis == 1)
			
		* Generate indicators for having taken the test
		
		foreach prefix in ``kid'_prefix' {
			foreach test in mat_s_prove1 dansk_m_prove1 {
				generate `prefix'took_`test' = (`prefix'`test' ~= .) if `prefix'birthyear >= 1986
			}
		}
		
		* Generate predicted outcomes in the full sample
		
		foreach var of varlist ${`kid'_outcomes} cmr_0*y cadhd_0*y {
			regress `var' ${`kid'_controls}
			capture predict pred_`var'
		}
		
		* Drop if missing birth weight and gestational age
		
		display "Summary statistics for dropped observations, `kid' sample"
		generate to_drop = missing(bw) | missing(ga)
		display "Missing birth weight"
		tab birthyear if missing(bw)
		display "Missing gestational age"
		tab birthyear if missing(ga)
		if "`kid'" == "fc" generate has_younger_sibs = (younger_sibs ~= 0)
		estpost ttest bw ``kid'_controls' `pred_outcomes' ${`kid'_outcomes} ///
			`add_controls' ``kid'_add_controls' ``kid'_mecs' if inrange(birthyear, $startyr, $endyr), by(to_drop) unequal
		esttab using "$resdir/`kid'_dropped_obs.csv", scsv replace prehead("sep=;") star(* 0.10 ** 0.05 *** 0.01) ///
			cells((mu_1(fmt(3)) N_1(fmt(0)) mu_2(fmt(3)) N_2(fmt(0)) t(star abs fmt(3)) p(fmt(3)) _star))
		if "`kid'" == "fc" drop has_younger_sibs
		drop if to_drop
		drop to_drop
		
		* Add information on year when 9th grade tests were taken
		
		if "`kid'" == "fc_all" {
			merge 1:1 pnr using "$datadir/year_completed", keep(match master)
			drop _merge
			generate age_at_test = (ym(year_completed, 5) - ym(year(fodtdato), month(fodtdato))) / 12
		}
		if "`kid'" == "sib" {
			merge m:1 pnr using "$datadir/year_completed", keep(match master)
			drop _merge
			generate age_at_test = (ym(year_completed, 5) - ym(year(fodtdato), month(fodtdato))) / 12
			rename pnr temp_pnr
			rename year_completed temp_year_completed
			rename sib_pnr pnr
			merge m:1 pnr using "$datadir/year_completed", keep(match master)
			drop _merge
			rename year_completed sib_year_completed
			rename pnr sib_pnr
			rename temp_pnr pnr
			rename temp_year_completed year_completed
			generate sib_age_at_test = (ym(sib_year_completed, 5) - ym(year(sib_fodtdato), month(sib_fodtdato))) / 12
		}
		
		* In the siblings sample, create an indicator for one observation per focal child and then add the predicted
		* outcomes for focal children
		
		if "`kid'" == "sib" {
			sort pnr sib_pnr
			tempvar temp1 temp2
			generate `temp1' = (sib_mat_s_prove1 ~= . | sib_dansk_m_prove1 ~= .)
			egen `temp2' = max(`temp1'), by(pnr)
			egen fc_tag = tag(pnr `temp1')
			replace fc_tag = 0 if `temp1' ~= `temp2'
			drop `temp1' `temp2'
			
			merge m:1 pnr using "$datadir/$fc_all_data", keepusing(pred_*) keep(match master)
			drop _merge
		}
		
		compress
		save "$datadir/${`kid'_data}", replace
	}

	log close
	
end


**********************
**** Data preparation: Create indices
**********************

program define dataprep_create_indices
  
  * Add some variables (enrollment in university or higher education, etc.)
  
	display "Add some more variables to the data:"
  quietly {
    use "$datadir/$fc_sib_data" if inrange(birthyear, $startyr, $endyr), clear
    save "$datadir/$data_restat", replace
	}
	
  * Enrollment in university or higher education
  
  display "  - enrollment in university or higher education"
  tempfile higher
  quietly {
    use "$datadir/$add_educ", clear
    foreach var of varlist _all {
      rename `var' sib_`var'
    }
    save "`higher'", replace
    use "$datadir/$data_restat", clear
    merge m:1 pnr using "$datadir/$add_educ", keep(match master) nogen
    merge m:1 sib_pnr using "`higher'", keep(match master) nogen
    save "$datadir/$data_restat", replace
  }
  
  * Parental mental health outcomes
  
  display "  - parental mental health outcomes"
  quietly {
    merge m:1 pnr using "$datadir/$add_mental_health", keep(match master) nogen
    save "$datadir/$data_restat", replace
  }
  
  * Parental labor market outcomes
  
  display "  - parental labor market outcomes"
  quietly {
    merge m:1 pnr using "$datadir/$add_labor_force", keep(match master) nogen keepusing(m_*1? f_*1?)
    save "$datadir/$data_restat", replace
  }
  
  display
  
  * Generate some preliminary variables needed for constructing indices
  
	display "Generate some of the variables needed to create indices"
  quietly use "$datadir/$data_restat", clear
  
	* Generate real income (mother, father, family) X years after birth
	
	display "  - real income"
	tempvar cpi
	forval i = 0 / 15 {
	  quietly {
  		generate `cpi' = .
  		forval yr = $startyr / $endyr {
  			local yr_ab = `yr' + `i'
  			replace `cpi' = ${cpi_`yr_ab'} if birthyear == `yr'
  		}
  		generate real_m_income`i' = m_income`i' / (`cpi' * 10)
  		generate real_f_income`i' = f_income`i' / (`cpi' * 10)
  		drop `cpi'
	  }
	}
	
	foreach mec in real_m_income real_f_income m_intsup f_intsup m_extsup f_extsup m_antidep f_antidep {
  	display "  - `mec'"
	  foreach age_gr in $age_gr_list {
	    if strlen("`age_gr'") < 4 local start_len 1
	    else local start_len 2
	    local start_age = substr("`age_gr'", 1, `start_len')
	    local end_age = substr("`age_gr'", `=`start_len' + 1', .)

	    if strpos("`mec'", "antidep") > 0 & `start_age' == 0 local start_age = 2
	    
	    local mec_list ""
	    forval i = `start_age' / `end_age' {
	      local mec_list: display"`mec_list' `mec'`i'"
	    }
	    
	    quietly {
	      if strpos("`mec'", "income") > 0 | strpos("`mec'", "intsup") > 0 ///
	        gegen c`mec'_`age_gr'y = rowmean(`mec_list')
	      else ///
	        if strpos("`mec'", "antidep") > 0 & `start_age' == 2 ///
	          gegen c`mec'_25y = rowmax(`mec_list')
	        else ///
	          gegen c`mec'_`age_gr'y = rowmax(`mec_list')
	      if strpos("`mec'", "income") > 0 {
    	    generate ln_c`mec'_`age_gr'y = log(c`mec'_`age_gr'y)
    	    replace ln_c`mec'_`age_gr'y = 0 if c`mec'_`age_gr'y == 0
	      }
	    }
	  }
	}
	
	* Restrict divorce information to families where dad is present at birth and to first divorce
	
	display "  - divorce"
	tempvar divorce
	forval i = 0 / 10 {
	  quietly {
  		replace divorce_year`i' = . if dad_missing == 1
  		local dlist ""
  		forval j = 0 / `=`i'-1' {
  			local dlist "`dlist' divorce_year`j'"
  		}
  		if "`dlist'" ~= "" {
  			gegen `divorce' = rowtotal(`dlist'), missing
  			replace divorce_year`i' = 0 if `divorce' == 1
  			drop `divorce'
  		}
	  }
	}
	foreach mec in divorce_year {
	  quietly {
  		gegen c`mec'_05y = rowmax(`mec'0 `mec'1 `mec'2 `mec'3 `mec'4 `mec'5)
  		gegen c`mec'_610y = rowmax(`mec'6 `mec'7 `mec'8 `mec'9 `mec'10)
  		// gegen c`mec'_1115y = rowmax(`mec'11 `mec'12 `mec'13 `mec'14 `mec'15)
	  }
	}
  
	display "  - missing values for sibling variables"
	quietly {
	  foreach age_gr in 05 610 1115 {
	    if strlen("`age_gr'") < 4 local start_len 1
	    else local start_len 2
	    local start_age = substr("`age_gr'", 1, `start_len')
	    local end_age = substr("`age_gr'", `=`start_len' + 1', .)
  	  foreach var of varlist sib_*_`age_gr'y {
  	    replace `var' = . if sib_birthyear - birthyear > `end_age'
  	  } 
	  }
	  replace sib_ac_track_at_18 = . if sib_enrolled_beyond == .
  }
  
  display "  - variables for the age-range 6-15"
  quietly {
    gegen cadm_615y = rowmax(cadm_610y cadm_1115y)
		gegen cer_615y = rowmax(cer_610y cer_1115y)
    gegen sib_cadm_615y = rowmax(sib_cadm_610y sib_cadm_1115y)
		gegen sib_cer_615y = rowmax(sib_cer_610y sib_cer_1115y)
  }
  
	display "  - missing values for dad variables if dad is missing"
	quietly {
	  foreach var of varlist dad_* f_* {
	    if "`var'" != "dad_missing" {
	      replace `var' = . if dad_missing == 1
	      replace `var' = . if dad_missing == 0 & dad_age == .
	    }
	  }
	}
	
	quietly save "$datadir/$data_restat", replace
  
  * Generate indices
  
  quietly use "$datadir/$data_restat", clear
  
  tempvar mean sd
  local to_drop ""
  
  display "Creating indices:"
  foreach pers in $index_list {
    display "  - `pers': "
    foreach idx in ${index_list_`pers'} {
      display "    * `idx': "
      local idx_vars ""
      foreach var of varlist ${`pers'_`idx'} {
        display "      - `var'"
        quietly {
          tempvar idx_`var'
          if strpos("`var'", "prove1") == 0 {
            gegen `mean' = mean(`var'), by(birthyear)
            gegen `sd' = sd(`var'), by(birthyear)
            generate `idx_`var'' = (`var' - `mean') / `sd'
            drop `mean' `sd'
          }
          else generate `idx_`var'' = `var'
          local idx_vars: display "`idx_vars' `idx_`var''"
        }
      }
      display "      - index"
      quietly {
        gegen idx_`pers'_`idx' = rowtotal(`idx_vars'), missing
        gegen `sd' = sd(idx_`pers'_`idx')
        replace idx_`pers'_`idx' = idx_`pers'_`idx' / `sd'
        drop `sd'
        regress idx_`pers'_`idx' bw ${`pers'_controls}
        predict pred_idx_`pers'_`idx'
      }
      local to_drop: display "`to_drop' `idx_vars'"
    }
  }
  drop `to_drop'
  
  quietly {
    compress
    save "$datadir/$data_restat", replace
  }
  
end



*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
*  II. PRELIMINARY ANALYSES AND VALIDATION CHECKS
*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*

**********************
**** Preliminary analyses: Calculate the optimal bandwidth according to Calonico et al. (2014)
**********************

program define prelim_bandwidth
	
	set more off
	
	log using "$logdir\prelim_bandwidth.log", text replace
	
	local m32 "ga >= 32"
	local ga "m32"

	* Run the regressions
	
  foreach pers in $index_list {
    display as text "Calculating bandwidths for " as input "`pers'" as text ":"
  	use "$datadir/$data_restat" if inrange(birthyear, $startyr, $endyr) & ``ga'' & ${`pers'_cond}, clear
  	
		* Keep only observations in the relevant range of birth weights (symmetric around 1,500g)
		
		quietly {
			sum bw
			keep if inrange(bw, r(min), 1500 + (1500 - r(min)))
		}
    
		capture matrix drop hcombined
		tempvar wgt pred_ll pred_poly sumsq
		local colnames ""
		local colnames_both ""
		
    foreach idx in ${index_list_`pers'} {
			display as text "  - " as result "`idx'"
			
			local vars: display "${`pers'_`idx'}"
			
  		foreach var of varlist idx_`pers'_`idx' `vars' {
				display as text "    * " as result "`var'"
				
				local colnames: display "`colnames' `var'"
				local cct_list ""
				quietly rdbwselect `var' bw, c(1500) kernel(triangular) all
				foreach bw_method in mse cer {
					foreach bw_type in h b {
						scalar cct_`bw_type'_`bw_method'rd = `e(`bw_type'_`bw_method'rd)'
						scalar cct_`bw_type'_`bw_method'two_l = `e(`bw_type'_`bw_method'two_l)'
						scalar cct_`bw_type'_`bw_method'two_r = `e(`bw_type'_`bw_method'two_r)'
						if "`cct_list'" == "" {
							local cct_list "cct_`bw_type'_`bw_method'rd \ cct_`bw_type'_`bw_method'two_l \ cct_`bw_type'_`bw_method'two_r"
						}
						else {
							local cct_list: display "`cct_list' \ cct_`bw_type'_`bw_method'rd \ cct_`bw_type'_`bw_method'two_l " ///
								"\ cct_`bw_type'_`bw_method'two_r"
						}
					}
				}
				
				matrix hcombined = nullmat(hcombined), (`cct_list')
			}
			matrix rownames hcombined = MSE_h MSE_h(L) MSE_h(R) MSE_b MSE_b(L) MSE_b(R) ///
				CER_h CER_h(L) CER_h(R) CER_b CER_b(L) CER_b(R)
			matrix colnames hcombined = `colnames'
			quietly {
  			esttab matrix(hcombined) using "$resdir/bandwidth_`pers'.csv", scsv replace prehead("sep=;")
  			esttab matrix(hcombined) using "$resdir/Saved_Restat/optimal_bw_`pers'.csv", csv replace nomtitles plain
			}
		}
	}
	
	log close

end


**********************
**** Preliminary analyses: McCrary Test
**********************

program define prelim_mccrary

  eststo clear
  capture log close
  set more off
  log using "$logdir\prelim_mccrary.log", text replace
  
  display _newline(2) "== CONDUCTING THE McCRARY TEST ==" _newline(2)
  
  local all "1 == 1"
	local m32 "ga >= 32"
	local l32 "ga < 32"
  
	local a `"prehead("sep=;" "\`kid\'\`i\' - \${\`kid\'_cond\`i\'}" "") replace"'
	foreach kid in sib fc {
    eststo clear
    foreach ga in m32 l32 all {
      use "$datadir/${`kid'_data}" if inrange(birthyear, $startyr, $endyr) & ``ga'' & bw < 2000 & ${`kid'_cond}, clear
      keep pnr bw
      
      * Get the number of observations (not bins)
      
      quietly count if inrange(bw, $ll, $ul)
      local N_obs = r(N)
      
      * Collapse the data in 10-gram bins
    
      generate obs = 1
      egen bin = cut(bw), at(1000(10)2000)
      collapse (sum) obs, by(bin)
      
      * Generate some variables needed for the regression-based test
      
      gen vlbw = (bin < 1500)
      gen f1 = (bin - 1500) * vlbw
      gen f2 = (bin - 1500) * (1 - vlbw)
      gen twgt = 1 - abs((bin - 1500) / $bw)
      gen heap50_all = trunc(bin / 50) * (mod(bin, 50) == 0)
      
      * Run regressions using the number of observations in the bin as dependent variable

      eststo, title(10g-CCT-heap50): rdrobust2 obs bin, c(1500) h($bw) covs(i.heap50_all)
      estadd scalar N_obs = `N_obs'
      
      * Run regressions using the log of the number of observations in the bin as dependent variable
      
      eststo, title(10g-CCT-ln-heap50): rdrobust2 ln_obs bin, c(1500) h($bw) covs(i.heap50_all)
      estadd scalar N_obs = `N_obs'
      
      * Plot the histogram
      
      quietly sum obs if inrange(bin, $ll, $ul)
      local max = r(max)
      local max = ceil(`max' / 20) * 20
      
      twoway bar obs bin if inrange(bin, $ll, $ul), barwidth(10) ///
        fcolor(white) lcolor(black) xline(1500, lcolor(black) lpattern(dot)) ///
        ytitle("Number of observations", size(small)) ylabel(0(20)`max', labsize(vsmall) nogrid) ///
        xtitle("Birth weight (grams)", size(small)) xlabel($ll (50) $ul, labsize(vsmall) nogrid) ///
        graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) legend(off)
      graph export "$figdir/`kid'_hist_bw_`ga'.png", replace width(2048)
      graph export "$figdir/`kid'_hist_bw_`ga'.eps", replace logo(off)
    }
    esttab using "$resdir\mccrary.csv", scsv mtitles nogaps collabels(none) compress `a' ///
      cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3))) ///
      rename(RD_Estimate vlbw) keep(vlbw) star(* 0.10 ** 0.05 *** 0.01) ///
      stats(N N_obs, fmt(%9.0fc %9.0fc) labels("Bins" "Observations"))
    local a `"prehead("" "\`kid\'\`i\' - \${\`kid\'_cond\`i\'}" "") append"'
  }
	
	log close
	
end


**********************
**** Preliminary analyses: Graphs of child outcomes and covariates
**********************

program define prelim_figures
	set more off
	
	local all "1 == 1"
	local m32 "ga >= 32"
	local l32 "ga < 32"
	
	local add_controls "apgar dad_age dad_elen f_education_mis m_income f_income total_income took*"
	
	local fc_all_controls: subinstr global fc_all_controls "i." "", all
	local sib_controls: subinstr global sib_controls "i." "", all
	local sib_controls "`sib_controls' sib_age agediff_*"
	local fc_controls: subinstr global fc_controls "i." "", all
	
	foreach kid in sib fc fc_all {
    eststo clear
    foreach ga in m32 l32 /* all */ {
      local ll = 1500 - $graph_bw
      local ul = 1500 + $graph_bw + 1
      
      use "$datadir/${`kid'_data}" if inrange(bw, `ll', `ul') & inrange(birthyear, $startyr, $endyr) & ///
        ``ga'' & ${`kid'_cond}, clear
      
      local pred_outcomes ""
      foreach var of varlist ${`kid'_outcomes} {
        local pred_outcomes "`pred_outcomes' pred_`var'"
      }
  
      egen bin = cut(bw), at(`ll'($graph_agg)1499 1501($graph_agg)`ul')
      drop if bw == 1500
      collapse (mean) ``kid'_controls' `add_controls' ${`kid'_outcomes} `pred_outcomes', by(bin)
      sort bin
      
      local ul = `ul' - 1
      
      foreach var of varlist ``kid'_controls' `add_controls' ${`kid'_outcomes} `pred_outcomes' {
        quietly sum `var'
        local rmin = r(min)
        local rmax = r(max)
        local range = `rmax' - `rmin'
        local rmid = `rmin' + `range' / 2
        
        local round = 10
        while `range' / `round' < 1 {
          local round = `round' / 10
        }
        local step = cond(trunc(`range'/`round') <= 1.5, `round', ///
          cond(trunc(`range'/`round') <= 3.5, `round' * 2.5, ///
          cond(trunc(`range'/`round') <= 6.5, `round' * 5,  `round' * 7.5)))
        local min = round(`rmin' - 2 * `step', `step')
        local max = round(`rmin' + 2 * `step', `step')
        if (`rmid' < `min' + 2 * `step') & (`min' - `step' >= 0) {
          local min = `min' - `step'
          local max = `max' - `step'
        }
        else if `rmid' > `min' + 3 * `step' {
          local min = `min' + `step'
          local max = `max' + `step'
        }
        if `rmin' >= 0 local min = max(`min', 0)
        
        capture noisily {
          twoway (scatter `var' bin, msymbol(o) mlcolor(black) mfcolor(white) ) ///
            (lfit `var' bin if bin < 1500, lwidth(medium) lpattern(solid) lcolor(black)) ///
            (lfit `var' bin if bin >= 1500, lwidth(medium) lpattern(solid) lcolor(black)) ///
            if bin ~= 1500, xline(1500, lcolor(black) lpattern(dot)) ///
            ytitle("", size(small)) ylabel(`min'(`step')`max', labsize(vsmall) nogrid) ///
            xtitle("Birth weight (grams)", size(small)) xlabel(`ll' (50) `ul', labsize(vsmall) nogrid) ///
            graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) legend(off)
          graph export "$figdir/`kid'_fit_`ga'_`var'.png", replace width(2048)
        }
      }
    }
	}
	
end


**********************
**** Preliminary analyses: Graphs of parent outcomes
**********************

program define prelim_figures_parents
	set more off
	
	local all "1 == 1"
	local m32 "ga >= 32"
	local l32 "ga < 32"
	tempvar divorce cpi
	
  eststo clear
  foreach ga in m32 l32 {
    local ll = 1500 - $graph_bw
    local ul = 1500 + $graph_bw + 1
    
    use "$datadir/$fc_data" if inrange(bw, `ll', `ul') & inrange(birthyear, $startyr, $endyr) & ///
      ``ga'' & $fc_cond, clear
    
    forval k = 0 / 10 {
      generate `cpi' = .
      forval yr = $startyr / $endyr {
        local yr_ab = `yr' + `k'
        replace `cpi' = ${cpi_`yr_ab'} if birthyear == `yr'
      }
      generate real_m_income`k' = m_income`k' / (`cpi' * 10)
      generate real_f_income`k' = f_income`k' / (`cpi' * 10)
      egen real_total_income`k' = rowtotal(real_m_income`k' real_f_income`k'), missing
      drop `cpi'
    }
    
    foreach mec in real_m_income real_f_income real_total_income m_intsup f_intsup {
      egen c`mec'_05y = rowmean(`mec'0 `mec'1 `mec'2 `mec'3 `mec'4 `mec'5)
      egen c`mec'_610y = rowmean(`mec'6 `mec'7 `mec'8 `mec'9 `mec'10)
      * egen c`mec'_1115y = rowmean(`mec'11 `mec'12 `mec'13 `mec'14 `mec'15)
    }
    
    foreach mec in m_antidep f_antidep {
      egen c`mec'_25y = rowmax(`mec'2 `mec'3 `mec'4 `mec'5)
      egen c`mec'_610y = rowmax(`mec'6 `mec'7 `mec'8 `mec'9 `mec'10)
      egen c`mec'_1115y = rowmax(`mec'11 `mec'12 `mec'13 `mec'14 `mec'15)
    }
    
    * Restrict divorce information to families where dad is present at birth and to first divorce
    
    forval k = 0 / 10 {
      replace divorce_year`k' = . if dad_missing == 1
      local dlist ""
      forval j = 0 / `=`k'-1' {
        local dlist "`dlist' divorce_year`j'"
      }
      if "`dlist'" ~= "" {
        egen `divorce' = rowtotal(`dlist'), missing
        replace divorce_year`k' = 0 if `divorce' == 1
        drop `divorce'
      }
    }
    
    foreach mec in m_extsup f_extsup divorce_year {
      egen c`mec'_05y = rowmax(`mec'0 `mec'1 `mec'2 `mec'3 `mec'4 `mec'5)
      egen c`mec'_610y = rowmax(`mec'6 `mec'7 `mec'8 `mec'9 `mec'10)
      egen c`mec'_010y = rowmax(c`mec'_05y c`mec'_610y)
    }

    egen bin = cut(bw), at(`ll'($graph_agg)1499 1501($graph_agg)`ul')
    drop if bw == 1500
    collapse (mean) $fc_mecs, by(bin)
    sort bin
    
    local ul = `ul' - 1
    
    foreach var of varlist $mom_outcomes $dad_outcomes {
      quietly sum `var'
      local rmin = r(min)
      local rmax = r(max)
      local range = `rmax' - `rmin'
      local rmid = `rmin' + `range' / 2
      local min = `rmin' - (`rmid' - `rmin') * 4 
      local max = `rmax' + (`rmax' - `rmid') * 4
      local round = 10
      while `range' / `round' < 1 {
        local round = `round' / 10
      }
      local round = max(`round', round(`range', `round' * 5))
      local min = round(`min', `round')
      local max = round(`max', `round')
      local step = round((`max' - `min') / 5, `round')
      if strpos("`var'", "prove1") == 0 local min = max(`min', 0)
      local max = `min' + 5 * `step'
      if (`rmid' < `min' + 2 * `step') & (`min' - `step' >= 0) {
        local min = `min' - `step'
        local max = `max' - `step'
      }
      else if `rmid' > `min' + 3 * `step' {
        local min = `min' + `step'
        local max = `max' + `step'
      }
      
      capture noisily {
        twoway (scatter `var' bin, msymbol(o) mlcolor(black) mfcolor(white) ) ///
          (lfit `var' bin if bin < 1500, lwidth(medium) lpattern(solid) lcolor(black)) ///
          (lfit `var' bin if bin >= 1500, lwidth(medium) lpattern(solid) lcolor(black)) ///
          if bin ~= 1500, xline(1500, lcolor(black) lpattern(dot)) ///
          ytitle("", size(small)) ylabel(`min'(`step')`max', labsize(vsmall) nogrid) ///
          xtitle("Birth weight (grams)", size(small)) xlabel(`ll' (50) `ul', labsize(vsmall) nogrid) ///
          graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) legend(off)
        graph export "$figdir/fc_fit_`ga'_`var'.png", replace width(2048)
      }
    }
  }
	
end


**********************
**** Preliminary analyses: Covariate tests
**********************

program define prelim_covtests
	
  capture log close
  set more off
	tempvar cpi
	
  log using "$logdir\prelim_covtests.log", text replace
  
  display _newline(2) "== CONDUCTING COVARIATE TESTS ==" _newline(2)
  
	local all "1 == 1"
	local m32 "ga >= 32"
	local l32 "ga < 32"
	
	* Get the list of predicted outcomes relevant for each sample
	
	use "$datadir/$sib_data" in 1, clear
	unab pred_sib: pred_sib*
	unab pred: pred_*
	local pred_fc: list pred - pred_sib
	
	* Create lists of variables for which to get summary statistics
	
	local fc_all_add_controls: display "apgar dad_missing dad_age dad_elen_impute f_education_mis dad_immigrant" ///
    "m_employed m_employed_mis f_employed f_employed_mis real_m_income real_f_income real_total_income" ///
	  " *took_* *_before age_at_test family_size younger_sibs has_younger_sibs `pred_fc'"
	local fc_add_controls: display "`fc_all_add_controls'"
	local sib_add_controls: display "sib_age sib_vlbw agediff_* sib_age_at_test sib_took*" //
    " sib_dead28d_older sib_dead1y_older sib_bw_next agediff_next sib_bw_young `pred_sib' `fc_add_controls'"
	
	local fc_all_controls: subinstr global fc_all_controls "i." "", all
	local sib_controls: subinstr global sib_controls "i." "", all
	local fc_controls "`fc_all_controls'"
	
	* Generate the lists of controls used in regressions
	
	local nc "i.heap50_all"
	local c "i.heap50_all ${`kid'_controls}"
	
	foreach kid in sib fc /* fc_all */ {
		use "$datadir/${`kid'_data}" if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr), clear
		
		quietly {
			if "`kid'" == "fc_all" {
				generate has_younger_sibs = (younger_sibs ~= 0)
			}
			if "`kid'" == "fc" {
				capture egen younger_sibs = total(younger_sib), by(pnr)
				generate has_younger_sibs = (younger_sibs ~= 0)
			}
			if "`kid'" == "sib" {
				capture egen younger_sibs = total(younger_sib), by(pnr)
				generate has_younger_sibs = (younger_sibs ~= 0)
				capture generate sib_vlbw = sib_bw < 1500
        generate sib_dead28d_older = sib_dead28d * older_sib
        generate sib_dead1y_older = sib_dead1y * older_sib
        generate sib_bw_next = sib_bw if sib_birth_order == birth_order + 1
        generate agediff_next = agediff_young if sib_birth_order == birth_order + 1
        generate sib_bw_young = sib_bw if younger_sib == 1
			}
			foreach var of varlist dad_* *_dad_* f_* {
				if "`var'" ~= "dad_missing" replace `var' = . if dad_missing == 1
			}
		}
		
		quietly {
			if inlist("`kid'", "sib", "fc_all", "fc") {
				generate `cpi' = .
				forval yr = $startyr / $endyr {
					replace `cpi' = ${cpi_`yr'} if birthyear == `yr'
				}
				generate real_m_income = m_income / (`cpi' * 10)
				generate real_f_income = f_income / (`cpi' * 10)
				egen real_total_income = rowtotal(real_m_income real_f_income), missing
				drop `cpi'
			}
		}
		
		* Run the regressions
		
    local a `"mtitles replace prehead("sep=;")"'
    foreach var of varlist ``kid'_controls' ``kid'_add_controls' {
      eststo clear
      foreach controls in nc /* c */ {
        foreach ga in m32 l32 all {
        
          * Estimate local linear models
          
          eststo, title(`ga'): rdrobust2 `var' bw if ``ga'' & ${`kid'_cond}, c(1500) h($bw) covs(``controls'') stats
          estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
        }
      }
      esttab using "$resdir/`kid'_covtests.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `var') ///
        cells("b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3)) mean_dep(fmt(3)) Nobs(fmt(%9.0fc))") ///
          star(* 0.10 ** 0.05 *** 0.01) ///
          nogaps noobs nonotes compress `a'
      local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
    }
  }
	
	log close

end


**********************
**** Preliminary analyses: Selected covariate tests with adjustment for multiple inference
**********************

program define prelim_covtests_multinf
  set more off
  
	log using "$logdir\prelim_covtests_multinf.log", text replace
		
	local m32 "ga >= 32"
	local l32 "ga < 32"
	
	local nc "i.heap50_all"
	local controls "nc"

	local controls_fc: display "male birth_order multiple ga family_size younger_sibs has_younger_sibs mom_age " ///
	  "mom_elen_impute mom_immigrant married_birth dad_age dad_elen_impute dad_immigrant dad_missing"
 	local controls_sib "sib_male sib_birth_order sib_multiple sib_ga sib_bw sib_vlbw agediff_old agediff_young"

  
  local a `"mtitles replace prehead("sep=;")"'
  foreach ga in m32 l32 {
  	
 		* Run the regressions for individual variables
    
    local controls_gen "younger_sibs has_younger_sibs sib_vlbw"
    local controls_exist: list controls_fc | controls_sib
    local controls_exist: list controls_exist - controls_gen
  	use `controls_exist' younger_sib pnr bw ga birthyear sib_birthyear heap50_all dead1y fc_tag using "$datadir/$data_restat" ///
  	  if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr), clear
	  capture matrix drop pvals
	  local est_list ""
	  
	  * Controls in the sample of focal children
	  
	  gegen younger_sibs = total(younger_sib), by(pnr)
	  gegen has_younger_sibs = max(younger_sibs > 0), by(pnr)
	  generate sib_vlbw = (sib_bw < 1500)
  	foreach var in `controls_fc' {
   		eststo var_`var', title(`var'): rdrobust2 `var' bw if ``ga'' & $fc_cond, c(1500) h($bw) covs(``controls'')
  		matrix pvals = nullmat(pvals) \ e(pv_rb)
  		local est_list: display "`est_list' var_`var'"
  	}
  	
	  * Controls in the sample of siblings
	  
	  capture generate sib_vlbw = sib_bw < 1500
  	foreach var in `controls_sib' {
   		eststo var_`var', title(`var'): rdrobust2 `var' bw if ``ga'' & $sib_cond, c(1500) h($bw) covs(``controls'')
  		matrix pvals = nullmat(pvals) \ e(pv_rb)
  		local est_list: display "`est_list' var_`var'"
  	}
  	
  	
  	* Add sharp q values for multiple inference correction
  	
  	bky_sharpq pvals, estimates(`est_list')
  	
  	esttab idx_* var_* using "$resdir/prelim_covtests_multinf.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `ga') ///
  		cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3)) bky_q(fmt(3) par({ }))) ///
  		star(* 0.10 ** 0.05 *** 0.01) nogaps noobs nonotes compress ///
  		scalars("mean_right Mean outcome" "N Observations") sfmt(%6.3f %6.0fc) `a'
    local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
  }
	
	log close

end


**********************
**** Preliminary analyses: Graphs of indices and controls
**********************

program define prelim_figures_index
	set more off
	
	log using "$logdir\prelim_figures_index.log", text replace
		
	local m32 "ga >= 32"
	local l32 "ga < 32"
	
	local nc "i.heap50_all"
	local controls "nc"
  
	local ll = 1500 - $graph_bw
	local ul = 1500 + $graph_bw + 1
	
  use "$datadir/$data_restat" if inrange(bw, `ll', `ul') & inrange(birthyear, $startyr, $endyr), clear
  
  generate m32 = (`m32')
  egen bin = cut(bw), at(`ll'($graph_agg)1499 1501($graph_agg)`ul')
	drop if bw == 1500 | bin == .
  
  * List of variables: indices and predicted indices
  
  local var_list ""
  foreach pers in $index_list {
    foreach idx in ${index_list_`pers'} {
      local var_list: display "`var_list' idx_`pers'_`idx' pred_idx_`pers'_`idx'"
      replace idx_`pers'_`idx' = . if !(${`pers'_cond})
      replace pred_idx_`pers'_`idx' = . if !(${`pers'_cond})
    }
  }
  
  * List of variables: controls in the sample of focal children
	  
	local control_list: display "mom_age mom_elen_impute mom_immigrant married_birth dad_age dad_elen_impute " ///
	  "dad_immigrant dad_missing male birth_order multiple ga"
	foreach var in `control_list' {
	  local var_list: display "`var_list' `var'"
 		replace `var' = . if !($fc_cond)
	}
	
  * List of variables: controls in the sample of siblings
  
  capture generate sib_vlbw = sib_bw < 1500
	local control_list "sib_bw sib_male sib_birth_order sib_ga sib_multiple sib_vlbw agediff_old agediff_young"
	foreach var in `control_list' {
	  local var_list: display "`var_list' `var'"
 		replace `var' = . if !($sib_cond)
	}
  	  
	gcollapse (mean) `var_list' (count) nobs = bw, by(bin m32)
	
	export delimited "$resdir/prelim_figures_index_graph.csv", replace
	
	* Now draw the figures -- because of the large number of figures, this part was run on a local computer, not on the server
	
  insheet using "$resdir/prelim_figures_index_graph.csv", comma names clear
	foreach ga in m32 l32 {
		foreach var of varlist _all {
			if !inlist("`var'", "bin", "m32", "nobs") & substr("`var'", 1, 5) != "pred_" {

				* Determine the range on the y-axis

				quietly sum `var' if ``ga''
				local rmin = r(min)
				local rmax = r(max)
				local range = `rmax' - `rmin'
				local rmid = `rmin' + `range' / 2
				
				local round = 10
				while `range' / `round' < 1 {
					local round = `round' / 10
				}
				local step = cond(trunc(`range'/`round') <= 1.5, `round', ///
					cond(trunc(`range'/`round') <= 3.5, `round' * 2.5, ///
					cond(trunc(`range'/`round') <= 6.5, `round' * 5,  `round' * 7.5)))
				local min = round(`rmin' - 2 * `step', `step')
				local max = round(`rmin' + 2 * `step', `step')
				if (`rmid' < `min' + 2 * `step') & (`min' - `step' >= 0) {
					local min = `min' - `step'
					local max = `max' - `step'
				}
				else if `rmid' > `min' + 3 * `step' {
					local min = `min' + `step'
					local max = `max' + `step'
				}
				if `rmin' >= 0 local min = max(`min', 0)
				
				* Plot the figures

				twoway (scatter `var' bin, msymbol(o) mlcolor(black) mfcolor(white) ) ///
					(lfit `var' bin if bin < 1500, lwidth(medium) lpattern(solid) lcolor(black)) ///
					(lfit `var' bin if bin >= 1500, lwidth(medium) lpattern(solid) lcolor(black)) ///
					if bin ~= 1500 & ``ga'', xline(1500, lcolor(black) lpattern(dot)) ///
					ytitle("Standard deviations", size(small)) ylabel(`min'(`step')`max', labsize(vsmall) nogrid) ///
					xtitle("Birth weight (grams)", size(small)) xlabel(1300 (50) 1700, labsize(vsmall) nogrid) ///
					graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white)) legend(off)
				graph export "$figdir/`ga'_`var'.png", replace width(2048)
			}
		}
	}
  
  log close
			
end


**********************
**** Additional preliminary analyses: Comparison of sample of all focal children vs focal children with siblings
**********************

program define prelim_sumstat_fc
	set more off
	
	log using "$logdir\prelim_sumstat_fc.log", text replace
	
	tempfile fc
	
	local add_controls "apgar dad_missing dad_age dad_elen_impute f_education_mis dad_immigrant m_employed m_employed_mis"
	local add_controls "`add_controls' f_employed f_employed_mis m_income f_income total_income *took_* *_before family_size"
	local pred_outcomes "pred_*"
	
	local fc_all_controls: subinstr global fc_all_controls "i." "", all
	local controls: subinstr global controls "i." "", all
	
	use `fc_all_controls' `pred_outcomes' $fc_outcomes `add_controls' bw fc_tag using "$datadir/$fc_data" ///
		if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr) & fc_tag == 1, clear
	drop pred_sib* sib*
	save "`fc'", replace
	
	use `fc_all_controls' `pred_outcomes' $fc_outcomes `add_controls' bw using "$datadir/$fc_all_data" ///
		if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr), clear
	generate fc_tag = 0
	append using "`fc'"
	
	foreach var of varlist dad_* *_dad_* f_* {
		if "`var'" ~= "dad_missing" replace `var' = . if dad_missing == 1
	}
	
	eststo clear
	estpost ttest `fc_all_controls' `pred_outcomes' $fc_outcomes `add_controls', by(fc_tag) unequal
	esttab using "$resdir/fc_all_fcsib_stats.csv", scsv replace prehead("sep=;") star(* 0.10 ** 0.05 *** 0.01) ///
		cells((mu_1(fmt(3)) N_1(fmt(0)) mu_2(fmt(3)) N_2(fmt(0)) t(star abs fmt(3)) p(fmt(3)) _star))
	log close

end


**********************
**** Additional preliminary analyses: comparison of sample of focal children within bandwidth vs all children born 
**** during this period
**********************

program define prelim_sumstat_bw
	set more off
	
	log using "$logdir\prelim_sumstat_bw.log", text replace
	
	tempfile fc
	
	local add_controls "apgar dad_missing dad_age dad_elen_impute f_education_mis dad_immigrant m_employed m_employed_mis"
	local add_controls "`add_controls' f_employed f_employed_mis m_income f_income total_income *took_* *_before family_size"
	local pred_outcomes "pred_*"
	
	local fc_all_controls: subinstr global fc_all_controls "i." "", all
	local controls: subinstr global controls "i." "", all
	
	* Compare all focal children to all children born during this period
	
	use `fc_all_controls' `pred_outcomes' $fc_outcomes `add_controls' bw using "$datadir/$fc_all_data" ///
		if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr), clear
	generate fc_all_bw = 1
	save "`fc'", replace
	
	use `fc_all_controls' `pred_outcomes' $fc_outcomes `add_controls' bw using "$datadir/$fc_all_data" ///
		if inrange(birthyear, $startyr, $endyr), clear
	generate fc_all_bw = 0
	append using "`fc'"
	
	foreach var of varlist dad_* *_dad_* f_* {
		if "`var'" ~= "dad_missing" replace `var' = . if dad_missing == 1
	}
	
	eststo clear
	estpost ttest `fc_all_controls' `pred_outcomes' $fc_outcomes `add_controls', by(fc_all_bw) unequal
	esttab using "$resdir/all_vs_bw_fc_all_stats.csv", scsv replace prehead("sep=;") star(* 0.10 ** 0.05 *** 0.01) ///
		cells((mu_1(fmt(3)) N_1(fmt(0)) mu_2(fmt(3)) N_2(fmt(0)) t(star abs fmt(3)) p(fmt(3)) _star))
	
	* Compare focal children with siblings to all children born during this period
	
	use `fc_all_controls' `pred_outcomes' $fc_outcomes `add_controls' bw fc_tag using "$datadir/$fc_data" ///
		if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr) & fc_tag == 1, clear
	drop pred_sib* sib*
	save "`fc'", replace
	
	use `fc_all_controls' `pred_outcomes' $fc_outcomes `add_controls' bw using "$datadir/$fc_all_data" ///
		if inrange(birthyear, $startyr, $endyr), clear
	generate fc_tag = 0
	append using "`fc'"
	
	foreach var of varlist dad_* *_dad_* f_* {
		if "`var'" ~= "dad_missing" replace `var' = . if dad_missing == 1
	}
	
	eststo clear
	estpost ttest `fc_all_controls' `pred_outcomes' $fc_outcomes `add_controls', by(fc_tag) unequal
	esttab using "$resdir/all_vs_bw_fc_stats.csv", scsv replace prehead("sep=;") star(* 0.10 ** 0.05 *** 0.01) ///
		cells((mu_1(fmt(3)) N_1(fmt(0)) mu_2(fmt(3)) N_2(fmt(0)) t(star abs fmt(3)) p(fmt(3)) _star))
	
	log close

end



*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
*  III. RESULTS
*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*

**********************
**** Results: Baseline, individual outcome variables, children
**********************

program define results_baseline_children
	set more off
	
	log using "$logdir\results_baseline_children.log", text replace
		
	local all "1 == 1"
	local m32 "ga >= 32"
	local l32 "ga < 32"
	
	foreach kid in sib fc /* fc_all */ {
		use "$datadir/${`kid'_data}" if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr), clear
		
		* Generate the lists of controls
		
		local nc "i.heap50_all"
		local c "i.heap50_all ${`kid'_controls}"
		
		* Run the regressions
		
    local a `"mtitles replace prehead("sep=;")"'
    local a `"`a' scalars("mean_right Mean outcome" "N Observations" "has_controls Controls")"'
    foreach var of varlist ${`kid'_outcomes} {
      eststo clear
      foreach controls in nc c {
        foreach ga in m32 l32 all {
        
          * Estimate local linear models
          
          eststo, title(`ga'): rdrobust2 `var' bw if ``ga'' & ${`kid'_cond}, c(1500) h($bw) covs(``controls'')
          estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
        }
      }
      esttab using "$resdir/`kid'_baseline.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `var') ///
        cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3))) star(* 0.10 ** 0.05 *** 0.01) ///
        nogaps noobs nonotes compress sfmt(%6.3f %6.0fc %f) `a'
      local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
      local a `"`a' scalars("mean_right Mean outcome" "N Observations")"'
    }
	}
	
	log close
	
end


**********************
**** Results: Baseline, individual outcome variables, parents
**********************

program define results_baseline_parents
	set more off

	log using "$logdir\results_baseline_parents.log", text replace
		
	local all "1 == 1"
	local m32 "ga >= 32"
	local l32 "ga < 32"
	tempvar divorce cpi
	
  use "$datadir/$fc_data" if inrange(birthyear, $startyr, $endyr) & inrange(bw, $ll, $ul), clear
  
  * Generate the lists of controls
  
  local nc "i.heap50_all"
  local c "i.heap50_all $fc_controls"
  
  * Generate real income (mother, father, family) X years after birth
  
  forval i = 0 / 10 {
    generate `cpi' = .
    forval yr = $startyr / $endyr {
      local yr_ab = `yr' + `i'
      replace `cpi' = ${cpi_`yr_ab'} if birthyear == `yr'
    }
    generate real_m_income`i' = m_income`i' / (`cpi' * 10)
    generate real_f_income`i' = f_income`i' / (`cpi' * 10)
    egen real_total_income`i' = rowtotal(real_m_income`i' real_f_income`i'), missing
    drop `cpi'
  }
  
  foreach mec in real_m_income real_f_income real_total_income m_intsup f_intsup {
    egen c`mec'_05y = rowmean(`mec'0 `mec'1 `mec'2 `mec'3 `mec'4 `mec'5)
    egen c`mec'_610y = rowmean(`mec'6 `mec'7 `mec'8 `mec'9 `mec'10)
    egen c`mec'_010y = rowmean(`mec'0 `mec'1 `mec'2 `mec'3 `mec'4 `mec'5 `mec'6 `mec'7 `mec'8 `mec'9 `mec'10)
  }
  
  foreach mec in real_m_income real_f_income real_total_income {
    generate ln_c`mec'_05y = log(c`mec'_05y)
    generate ln_c`mec'_610y = log(c`mec'_610y)
    generate ln_c`mec'_010y = log(c`mec'_010y)
    replace ln_c`mec'_05y = 0 if c`mec'_05y == 0
    replace ln_c`mec'_610y = 0 if c`mec'_610y == 0
    replace ln_c`mec'_010y = 0 if c`mec'_010y == 0
  }
  
  foreach mec in m_antidep f_antidep {
    egen c`mec'_25y = rowmax(`mec'2 `mec'3 `mec'4 `mec'5)
    egen c`mec'_610y = rowmax(`mec'6 `mec'7 `mec'8 `mec'9 `mec'10)
    egen c`mec'_1115y = rowmax(`mec'11 `mec'12 `mec'13 `mec'14 `mec'15)
  }
  
  * Restrict divorce information to families where dad is present at birth and to first divorce
  
  forval i = 0 / 10 {
    replace divorce_year`i' = . if dad_missing == 1
    local dlist ""
    forval j = 0 / `=`i'-1' {
      local dlist "`dlist' divorce_year`j'"
    }
    if "`dlist'" ~= "" {
      egen `divorce' = rowtotal(`dlist'), missing
      replace divorce_year`i' = 0 if `divorce' == 1
      drop `divorce'
    }
  }
  
  foreach mec in m_extsup f_extsup divorce_year {
    egen c`mec'_05y = rowmax(`mec'0 `mec'1 `mec'2 `mec'3 `mec'4 `mec'5)
    egen c`mec'_610y = rowmax(`mec'6 `mec'7 `mec'8 `mec'9 `mec'10)
    egen c`mec'_010y = rowmax(c`mec'_05y c`mec'_610y)
  }
  
  * Run the regressions
  
  foreach par in mom dad {
    local a `"mtitles replace prehead("sep=;")"'
    local a `"`a' scalars("mean_right Mean outcome" "N Observations" "has_controls Controls")"'

    foreach var of varlist ${`par'_outcomes} {
      eststo clear
      foreach controls in nc /* c */ {
        foreach ga in m32 l32 /* all */ {
          
          * Estimate the regressions for all focal children 
          
          eststo, title(`ga'): rdrobust2 `var' bw if ``ga'' & $fc_cond, c(1500) h($bw) covs(``controls'')
          estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
          
          * Estimate the regressions for surviving focal children 
          
          eststo, title(`ga'-surv): rdrobust2 `var' bw if ``ga'' & $fc_cond & dead1y == 0, ///
            c(1500) h($bw) covs(``controls'')
          estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
        }
      }
      esttab using "$resdir/`par'_baseline.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `var') ///
        cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3))) star(* 0.10 ** 0.05 *** 0.01) ///
        nogaps noobs nonotes compress sfmt(%6.3f %6.0fc %f) `a'
      local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
      local a `"`a' scalars("mean_right Mean outcome" "N Observations")"'
    }
  }
	
	log close
	
end


**********************
**** Results: Baseline, indices, with adjustment for multiple inference
**********************

program define results_baseline_index
	set more off
	
	log using "$logdir\results_baseline_index.log", text replace
	
  local m32 "ga >= 32"
	local l32 "ga < 32"
	
	local nc "i.heap50_all"
	local controls "nc"
  
  local a `"mtitles replace prehead("sep=;")"'
  foreach ga in m32 l32 {
  	use "$datadir/$data_restat" if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr), clear
	  eststo clear
	  capture matrix drop pvals
	  local est_list ""
	  
    foreach pers in $index_list {
      foreach idx in ${index_list_`pers'} {
    		
    		* Run the regressions for the index
    		
    		eststo idx_`pers'_`idx', title(idx_`pers'_`idx'): rdrobust2 idx_`pers'_`idx' bw if ``ga'' & ${`pers'_cond}, ///
    		  c(1500) h($bw) covs(``controls'')
    		matrix pvals = nullmat(pvals) \ e(pv_rb)
    		local est_list: display "`est_list' idx_`pers'_`idx'"
    	}
    	
    	* Run the regressions for individual variables or alternative aggregations
    	
  		foreach var of varlist ${`pers'_outcomes} {
  			eststo var_`var', title(`var'): rdrobust2 `var' bw if ``ga'' & ${`pers'_cond}, c(1500) h($bw) covs(``controls'')
  		}
  	}
  	
  	* Add sharp q values for multiple inference correction
  	
  	bky_sharpq pvals, estimates(`est_list')
  	
  	esttab idx_* var_* using "$resdir/baseline_index.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `ga') ///
  		cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3)) bky_q(fmt(3) par({ }))) ///
  		star(* 0.10 ** 0.05 *** 0.01) nogaps noobs nonotes compress ///
  		scalars("mean_right Mean outcome" "N Observations") sfmt(%6.3f %6.0fc) `a'
    local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
  }
	
	log close

end


**********************
**** Results: Comparison gaps
**********************

program define results_gaps
	set more off
	tempvar cpi
	
	log using "$logdir\results_gaps.log", text replace
	
	* Get the range of years of birth for siblings
	
	quietly {
		use "$datadir/$sib_data" if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr), clear
		sum sib_birthyear
		local sib_ymin = r(min)
		local sib_ymax = r(max)
	}
	
	* Calculate several gaps in test scores
	
	quietly {
		use "$datadir/$fc_all_data" if inrange(birthyear, `sib_ymin', `sib_ymax')
		sum birthyear if dansk_m_prove1 ~= . | mat_s_prove1  ~= .
		local ymin = r(min)
		local ymax = r(max)
		
		foreach p in 10 90 {
			egen total_income_p`p' = pctile(total_income), p(`p') by(birthyear)
		}
	}
	
	local ls = c(linesize)
	set linesize 150
	
	foreach tscore in mat_s_prove1 dansk_m_prove1 {
		display _newline(2) "Gaps in `tscore':"
		display "=======================" _newline
		
		* Income
		
		quietly sum `tscore' if total_income <= total_income_p10
		local mean1 = r(mean)
		display "Average score for: " _continue
		display _col(25) "10% income = " %5.3f r(mean) " (" %7.0fc r(N) " observations);" _continue
		quietly sum `tscore' if total_income >= total_income_p90 & total_income ~= .
		local mean2 = r(mean)
		display _col(70) " 90% income = " %5.3f r(mean) " (" %7.0fc r(N) " observations);" _continue
		display _col(125) " gap = " %5.3f abs(`mean2' - `mean1')
		
		* Gender
		
		quietly sum `tscore' if male == 0
		local mean1 = r(mean)
		display "Average score for: " _continue
		display _col(25) "girls = " %5.3f r(mean) " (" %7.0fc r(N) " observations);" _continue
		quietly sum `tscore' if male == 1
		local mean2 = r(mean)
		display _col(70) " boys = " %5.3f r(mean) " (" %7.0fc r(N) " observations);" _continue
		display _col(125) " gap = " %5.3f abs(`mean2' - `mean1')
		
		* Immigration status
		
		quietly sum `tscore' if mom_immigrant == 1
		local mean1 = r(mean)
		display "Average score for: " _continue
		display _col(25) "immigrants = " %5.3f r(mean) " (" %7.0fc r(N) " observations);" _continue
		quietly sum `tscore' if mom_immigrant == 0
		local mean2 = r(mean)
		display _col(70) " non-immigrants = " %5.3f r(mean) " (" %7.0fc r(N) " observations);" _continue
		display _col(125) " gap = " %5.3f abs(`mean2' - `mean1') _newline(2)
	}
	
	display "=======================" _newline
	display "Correlations between test scores and parental education and income"
	
	foreach var in m_income f_income {
		quietly generate `var'_mil = `var' / 1e6
	}
	foreach tscore in mat_s_prove1 dansk_m_prove1 {
		regress `tscore' mom_elen_impute m_education_mis dad_elen_impute f_education_mis
		regress `tscore' mom_elen_impute m_education_mis dad_elen_impute f_education_mis m_income_mil f_income_mil
	}
	
	set linesize `ls'
	log close
	
end


**********************
**** Robustness checks: bandwidth and polynomial degree
**********************

program define robust_bw_poly
	set more off
	tempvar wgt
	tempname coefs mat_bw mat_var
	
	local m32 "ga >= 32"
	local ga "m32"
	local nc "i.heap50_all"
	local controls "nc"
	
	local bwl = floor($bw * 0.5 / 50) * 50
	local bwu = ceil($bw * 1.5 / 50) * 50
	
	log using "$logdir\ec3_robust_bw_poly.log", text replace
	
	use "$datadir/$data_restat" if inrange(bw, 1500 - `bwu', 1500 + `bwu') & inrange(birthyear, $startyr, $endyr), clear
	
	* Run the regressions
	
	capture matrix drop `coefs'
	local cn ""
	
  local a `"mtitles replace prehead("sep=;")"'
  
  foreach pers in $index_list {
    foreach idx in ${sig_index_list_`pers'} ${insig_index_list_`pers'} {
  		
  		* Run the regressions for the index (but not for the variables making up the index because we already have most 
  		* of them already)
  		
  		foreach var of varlist idx_`pers'_`idx' /* ${`pers'_`idx'} */ {
        eststo clear
				local cn: display "`cn' `var'_p1 `var'_p1_l `var'_p1_r `var'_p2 `var'_p2_l `var'_p2_r"
				
				local rn ""
				capture matrix drop `mat_var'
				
				foreach bw of numlist `bwl'(10)`bwu' {
					local rn: di "`rn' `bw'"
					capture matrix drop `mat_bw'
					
					forval pd = 1 / 2 {
      			eststo, title(`bw'-`pd'): rdrobust2 `var' bw if ``ga'' & ${`pers'_cond}, ///
      			  c(1500) h(`bw') covs(``controls'') p(`pd')
						matrix `mat_bw' = nullmat(`mat_bw') , (_b[RD_Estimate], -e(ci_r_rb), -e(ci_l_rb))
					}
					matrix `mat_var' = nullmat(`mat_var') \ `mat_bw'
				}
				matrix `coefs' = nullmat(`coefs'), `mat_var'
				
    		esttab using "$resdir/robust_bw_poly.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `var') ///
    			cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3))) star(* 0.10 ** 0.05 *** 0.01) ///
    			nogaps noobs nonotes compress scalars("mean_right Mean outcome" "N Observations") sfmt(%6.3f %6.0fc) `a'
    		local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
  		}
  	}
	}
	matrix rownames `coefs' = `rn'
	matrix colnames `coefs' = `cn'
	* For some reason, esttab gives errors with some matrices -- then we need to output them "by hand"
	capture noisily esttab matrix(`coefs') using "$resdir/robust_bw_poly_graph.csv", csv replace nomtitles
	if _rc {
		file open bw_poly using "$resdir/robust_bw_poly_graph.csv", write text replace
		local cn: subinstr local cn " " ",", all
		file write bw_poly "`cn'" _newline
		local nrows = rowsof(`coefs')
		local ncols = colsof(`coefs')
		forval index_i = 1 / `nrows' {
			local cr: word `index_i' of `rn'
			file write bw_poly "`cr'," 
			forval index_j = 1 / `ncols' {
				file write bw_poly (`coefs'[`index_i', `index_j']) ","
			}
			file write bw_poly _newline
		}
		file close bw_poly
	}
	
	log close
	
	* Now draw the figures -- because of the large number of figures, this part was run on a local computer, not on the server

	insheet using "$resdir/robust_bw_poly_graph.csv", comma names clear
	rename v1 bw

	foreach var of varlist *_p1 {
		local lim = 0
		foreach yvar of varlist `var'* {
			quietly summarize `yvar' if inrange(bw, `bwl', `bwu')
			local lim = max(`lim', abs(r(max)), abs(r(min)))
		}
		if ceil((2 * `lim') / 0.25) <= 5 {
			local lim = ceil(`lim' / 0.25) * 0.25
			local step = 0.25
		}
		else {
			local lim = ceil(`lim' / 0.5) * 0.5
			local step = 0.5
		}

		twoway (connected `var' bw if bw == $bw, mcolor(black) msize(medium) msymbol(square)) ///
			(connected `var' bw, mcolor(black) msize(small) lcolor(black) lwidth(medium) lpattern(solid)) ///
			(rcap `var'_l `var'_r bw, lcolor(black) lwidth(thin) lpattern(solid)) if inrange(bw, `bwl', `bwu'), ///
			legend(off) ///
			ytitle("Effect size", size(small)) ylabel(-`lim' (`step') `lim', format(%4.2f) labsize(vsmall) nogrid) ///
			yline(0, lcolor(black) lwidth(thin) lpattern(solid)) ///
			xtitle("Bandwidth (grams)", size(small)) xlabel(`bwl' (50) `bwu', format(%5.0fc) labsize(vsmall) nogrid) ///
			graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
		graph export "$figdir/robust_bw_`var'.png", replace width(2048)
	}

end


**********************
**** Robustness checks: additional tests
**********************

program define robust_additional
	set more off
	tempvar wgt wgt2
	tempname max
	log using "$logdir\robust_additional.log", text replace
	
	local m32 "ga >= 32"
	local l32 "ga < 32"
	local ga "m32"
	
	local nc "i.heap50_all"
	local c "i.heap50_all ${`kid'_controls}"
	local controls "nc"
  
	* Load the optimal bandwidths calculated above
	
  display "Import optimal bandwidths:"
  foreach pers in $index_list {
  	quietly use "$datadir/$data_restat" in 1, clear
		local `pers'_maxbw = 0
		local vlist ""
		foreach idx in ${sig_index_list_`pers'} ${insig_index_list_`pers'} {
      foreach var of varlist idx_`pers'_`idx' {
        local vlist: display "`vlist' `var'"
      }
    }
    foreach idx in ${sig_index_list_`pers'} ${insig_index_list_`pers'} {
      display " - idx_`pers'_`idx'"
      quietly {
  			tempname `pers'_bwlist
  			insheet using "$resdir/Saved_Restat/optimal_bw_`pers'.csv", comma names clear
  			keep if inlist(v1, "MSE_h", "MSE_b")
  			keep v1 `vlist'
  			unab vlist: _all
  			local vlist = subinstr("`vlist'", "v1", "", .)
  			mkmat `vlist', matrix(``pers'_bwlist') rownames(v1)
  			mata: st_matrix("`max'", max(st_matrix("``pers'_bwlist'")))
  			local `pers'_maxbw = max(``pers'_maxbw', el(`max', 1, 1))
      }
		}
	}
	
	* Run the regressions
	
	local a `"mtitles replace prehead("sep=;")"'
	local a `"`a' scalars("mean_right Mean outcome" "N Observations" "has_controls Controls")"'
  foreach pers in $index_list {
		use "$datadir/$data_restat" if inrange(birthyear, $startyr, $endyr) ///
		  & inrange(bw, 1500 - ``pers'_maxbw', 1500 + ``pers'_maxbw'), clear
		
		encode pnrm, gen(pnrm_numeric)
		
		* Generate the lists of controls
		
		local c "i.heap50_all ${`pers'_controls}"
    foreach idx in  ${sig_index_list_`pers'} ${insig_index_list_`pers'} {
      foreach var of varlist idx_`pers'_`idx' /* ${`pers'_`idx'} */ {
  		  eststo clear
				
				* Rectangular kernel
								
				local sample_cond "``ga'' & ${`pers'_cond}"
				local var_list "``controls''"
				eststo, title(rectangular): rdrobust2 `var' bw if `sample_cond', ///
					c(1500) h($bw) covs(`var_list') kernel(uniform)
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* Donut excluding 1,500g
				
				local sample_cond "``ga'' & ${`pers'_cond} & bw ~= 1500"
				local var_list "``controls''"
				eststo, title(donut1500): rdrobust2 `var' bw if `sample_cond', c(1500) h($bw) covs(`var_list')
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* Donut excluding 1,490-1,510g
				
				local sample_cond "``ga'' & ${`pers'_cond} & ~inrange(bw, 1490, 1510)"
				local var_list "``controls''"
				eststo, title(donut10g): rdrobust2 `var' bw if `sample_cond', c(1500) h($bw) covs(`var_list')
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* Including controls
				
				local sample_cond "``ga'' & ${`pers'_cond}"
				local var_list "`c'"
				eststo, title(controls): rdrobust2 `var' bw if `sample_cond', c(1500) h($bw) covs(`var_list')
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* Including controls, only observations with non-missing maternal education
				
				local sample_cond "``ga'' & ${`pers'_cond} & (m_education_mis == 0)"
				local var_list "`c'"
				eststo, title(controls-no_mom_educ_miss): rdrobust2 `var' bw if `sample_cond', c(1500) h($bw) covs(`var_list')
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* Including controls, but not the indicator for missing maternal education
				
				local sample_cond "``ga'' & ${`pers'_cond}"
				local var_list: subinstr local c "m_education_mis" "", all
				eststo, title(controls): rdrobust2 `var' bw if `sample_cond', c(1500) h($bw) covs(`var_list')
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* Exclude focal multiple births
				
				local sample_cond "``ga'' & ${`pers'_cond} & multiple == 0"
				local var_list "``controls''"
				eststo, title(no_twins): rdrobust2 `var' bw if `sample_cond', c(1500) h($bw) covs(`var_list')
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* Standard errors clustered at the gram level
				
				local sample_cond "``ga'' & ${`pers'_cond}"
				local var_list "``controls''"
				eststo, title(cluster_bw): rdrobust2 `var' bw if `sample_cond', ///
					c(1500) h($bw) covs(`var_list') vce(cluster bw)
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* Using the CCT optimal bandwidths
				
				local opt_b = el(``pers'_bwlist', rownumb(``pers'_bwlist', "MSE_b"), colnumb(``pers'_bwlist', "`var'"))
				local opt_h = el(``pers'_bwlist', rownumb(``pers'_bwlist', "MSE_h"), colnumb(``pers'_bwlist', "`var'"))
				local sample_cond "``ga'' & ${`pers'_cond}"
				local var_list "``controls''"
				eststo, title(optimal_bw): rdrobust2 `var' bw if `sample_cond', ///
					c(1500) h(`opt_h') b(`opt_b') covs(`var_list')
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* No controls for heaping
				
				local sample_cond "``ga'' & ${`pers'_cond}"
				local var_list: subinstr local `controls' "i.heap50_all" ""
				eststo, title(no_heap): rdrobust2 `var' bw if `sample_cond', c(1500) h($bw) covs(`var_list')
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				* Only comparable GAs
				
				local sample_cond "``ga'' & ${`pers'_cond} & inrange(ga, 30, 33)"
				local var_list "``controls''"
				eststo, title(GA30-33): rdrobust2 `var' bw if `sample_cond', c(1500) h($bw) covs(`var_list')
				estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
				
				if "`pers'" != "fc" {
				
					* Include only family members of surviving focal children
					
					local sample_cond "``ga'' & ${`pers'_cond} & dead1y == 0"
					local var_list "``controls''"
					eststo, title(fc_survive): rdrobust2 `var' bw if `sample_cond', c(1500) h($bw) covs(`var_list')
					estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
					
					* Standard errors clustered at the family (mother) level
					
					tempvar mom_id
					destring pnrm, generate(`mom_id')
					local sample_cond "``ga'' & ${`pers'_cond}"
					local var_list "``controls''"
					eststo, title(cluster_mom): rdrobust2 `var' bw if `sample_cond', ///
						c(1500) h($bw) covs(`var_list') vce(cluster `mom_id')
					estadd local has_controls = cond("`controls'" == "c", "Yes", "No")
					drop `mom_id'
				}
				else {
				  eststo, title(fc_survive): emptycol
				  eststo, title(cluster_mom): emptycol
				}
    		esttab using "$resdir/robust_additional.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `var') ///
    			cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3))) star(* 0.10 ** 0.05 *** 0.01) ///
    			nogaps noobs nonotes compress sfmt(%6.3f %6.0fc %f) `a'
    		local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
    		local a `"`a' scalars("mean_right Mean outcome" "N Observations")"'
			}
		}
	}
	
	log close
		
end


**********************
**** Robustness checks: sensitivity to the cutoff
**********************

program define robust_cutoffs
	set more off
	
	tempname matc coefs
	
	log using "$logdir\robust_cutoffs.log", text replace
	
	local cmin = 1100
	local cmax = 3100
	local bwmin = `cmin' - $bw
	local bwmax = `cmax' + $bw
	
	local m32 "ga >= 32"
	local ga "m32"
	local nc "i.heap50_all"
	local controls "nc"
	
	use "$datadir/$data_restat" if inrange(bw, `bwmin', `bwmax') & inrange(birthyear, $startyr, $endyr), clear
	
	* Run the regressions
	
	capture matrix drop `coefs'
	local cn ""
	
  local a `"mtitles replace prehead("sep=;")"'
  foreach pers in $index_list {
    foreach idx in ${sig_index_list_`pers'} ${insig_index_list_`pers'} {
  		
  		* Run the regressions for the index and for the variables making up the index
  		
  		foreach var of varlist idx_`pers'_`idx' /* ${`pers'_`idx'} */ {
        eststo clear
				
				local cn: display "`cn' `var' `var'_l `var'_r"
				local rn ""
				capture matrix drop `matc'
				
				foreach cutoff of numlist `cmin'($bw)`cmax' {
					local rn: display "`rn' `cutoff'"
					
    			eststo, title(`cutoff'): rdrobust2 `var' bw if ``ga'' & ${`pers'_cond}, c(`cutoff') h($bw) covs(``controls'')
					matrix `matc' = nullmat(`matc') \ (_b[RD_Estimate], -e(ci_r_rb), -e(ci_l_rb))
				}
				matrix `coefs' = nullmat(`coefs') , `matc'
				
    		esttab using "$resdir/robust_cutoffs.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `var') ///
    			cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3))) star(* 0.10 ** 0.05 *** 0.01) ///
    			nogaps noobs nonotes compress scalars("mean_right Mean outcome" "N Observations") sfmt(%6.3f %6.0fc) `a'
  			local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
			}
		}
	}
	matrix rownames `coefs' = `rn'
	matrix colnames `coefs' = `cn'
	capture noisily esttab matrix(`coefs') using "$resdir/robust_cutoffs_graph.csv", csv replace nomtitles
		
	log close
	
  * Now draw the figures -- because of the large number of figures, this part was run on a local computer, not on the server

  insheet using "$resdir/robust_cutoffs_graph.csv", comma names clear
	rename v1 cutoff

	foreach var of varlist _all {
		if "`var'" != "cutoff" & !inlist(substr("`var'", -2, 2), "_l", "_r") {
			local lim = 0
			foreach yvar of varlist `var'* {
				quietly summarize `yvar' if inrange(cutoff, `cmin', `cmax')
				local lim = max(`lim', abs(r(max)), abs(r(min)))
			}
			if trunc((2 * `lim') / 0.25) <= 5 {
				local lim = round(`lim', 0.25)
				local step = 0.25
			}
			else {
				local lim = round(`lim', 0.5)
				local step = 0.5
			}
			
		twoway (connected `var' cutoff if cutoff == 1500, mcolor(black) msize(medium) msymbol(square)) ///
			(connected `var' cutoff, mcolor(black) msize(small) lcolor(black) lwidth(medium) lpattern(solid)) ///
			(rcap `var'_l `var'_r cutoff, lcolor(black) lwidth(thin) lpattern(solid)) if inrange(cutoff, `cmin', `cmax'), ///
			legend(off) ///
			ytitle("Effect size", size(small)) ylabel(-`lim' (`step') `lim', format(%4.2f) labsize(vsmall) nogrid) ///
			yline(0, lcolor(black) lwidth(thin) lpattern(solid)) ///
			xtitle("Cutoff (grams)", size(small)) xlabel(`cmin' ($bw) `cmax', format(%5.0fc) labsize(vsmall) nogrid) ///
			graphregion(fcolor(white) lcolor(white) ifcolor(white) ilcolor(white))
			graph export "$figdir/robust_cutoffs_`var'.png", replace width(2048)
		}
	}

end


**********************
**** Mechanisms: Quantile regressions for test scores
**********************

program define mecs_quantile
  set more off
  
  log using "$logdir\mecs_quantile.log", text replace
    
	local m32 "ga >= 32"
	local l32 "ga < 32"
	
	local nc "i.heap50_all"
	local controls "nc"
  
  local lt_fc ""
  local lt_sib "Siblings of "
  local lt_mom "Mothers of "
  local lt_dad "Fathers of "
  
  * Generate list of quantiles
  
  local qlist ".20 .40 .60 .80"
  local qlabels ""
  local pctiles ""
  foreach q in `qlist' {
    local qlabels: display "`qlabels' p`=`q'*100'"
    local pctiles: display "`pctiles' `=`q'*100'"
  }
  
  local a `"mtitles replace prehead("sep=;")"'
  foreach ga in m32 {
  	use "$datadir/$data_restat" if inrange(birthyear, $startyr, $endyr), clear
	  eststo clear
	  
    foreach pers in fc sib {
      
      local lab1: display "`lt_`pers''VLBW focal children"
      local lab2: display "`lt_`pers''" cond("`lt_`pers''" == "", "N", "n") "on-VLBW focal children"
      
      foreach idx in test_scores {
    		
        capture noisily eststo, title(idx_`pers'_`idx'): rdqtese2 idx_`pers'_`idx' if ``ga'' & ${`pers'_cond}, ///
          quantiles(`qlist')
        local coefs: colnames e(b)
        local telabels ""
        forval j = 1 / `=wordcount("`coefs'")' {
          local telabels: display "`telabels' `=word("`coefs'", `j')' te_`=word("`qlabels'", `j')'"
        }
        
        local bwlist = e(bw_list)
        local bw_above 0
        forval b = 1 / 3 {
          local bw = cond(inlist(word("`bwlist'", `b'), "999", "-1"), "0", word("`bwlist'", `b'))
          local bw_above = max(`bw_above', `bw')
        }
        local bw_below 0
        forval b = 4 / 6 {
          local bw = cond(inlist(word("`bwlist'", `b'), "999", "-1"), "0", word("`bwlist'", `b'))
          local bw_below = max(`bw_below', `bw')
        }
        estadd scalar bw_above = `bw_above'
        estadd scalar bw_below = `bw_below'
        
        count if ``ga'' & ${`pers'_cond} & idx_`pers'_`idx' != . & inrange(bw, 1500 - `bw_below', 1500 + `bw_above')
        estadd scalar N = r(N), replace
        
        quietly _pctile idx_`pers'_`idx' if ``ga'' & ${`pers'_cond} & inrange(bw, 1500, 1500 + `bw_above'), ///
          percentiles(`pctiles')
        forval j = 1 / `=wordcount("`qlabels'")' {
          estadd scalar `=word("`qlabels'", `j')' = r(r`j')
        }
      }
    }
  }
  esttab using "$resdir/mecs_quantile.csv", scsv mtitles replace prehead("sep=;") b(3) se(3) ///
    star(* 0.10 ** 0.05 *** 0.01) nogaps noobs nonotes compress coeflabels(`telabels') ///
    scalars("N Observations" "converged Converged" "bw_below Bandwidth left" "bw_above Bandwidth right" `qlabels') ///
    sfmt(%6.0fc %f %6.3fc)
  
  log close
  
end


**********************
**** Mechanisms: Heterogeneity by focal child characteristics (gender, parity)
**********************

program define mecs_het
  set more off
  
	log using "$logdir\mecs_het.log", text replace
		
	local m32 "ga >= 32"
	local l32 "ga < 32"
	local ga "m32"
	
	local nc "i.heap50_all"
	local controls "nc"
  
  * Define the heterogeneity conditions
  
  local singleton "(multiple == 0)"
  local fc_boy "(male == 1)"
  	
	* Run the regressions
	
	use "$datadir/$data_restat" if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr) & ``ga'', clear
	
  local a `"mtitles replace prehead("sep=;")"'
  foreach pers in $index_list {
    foreach idx in ${sig_index_list_`pers'} {
      foreach var of varlist idx_`pers'_`idx' {
  		  eststo clear
				
				* Loop over the heterogeneity conditions
				
				foreach het_cond in singleton fc_boy {
				  foreach het_val in 0 1 {
    				eststo, title(`het_cond' = `het_val'): rdrobust2 `var' bw if ${`pers'_cond} & (``het_cond'' == `het_val'), ///
    		      c(1500) h($bw) covs(``controls'')
				  }
				}
								
    		esttab using "$resdir/mecs_het.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `var') ///
    			cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3))) star(* 0.10 ** 0.05 *** 0.01) ///
    			nogaps noobs nonotes compress scalars("mean_right Mean outcome" "N Observations") sfmt(%6.3f %6.0fc) `a'
    		local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
			}
		}
	}
	
	log close
	
end


**********************
**** Mechanisms: Heterogeneity by initial sibling endowments
**********************

program define mecs_het_endowments
	set more off
	
	log using "$logdir\mecs_het_endowments.log", text replace
		
	local m32 "ga >= 32"
	
	* Get the median birth weight for all children born over the relevant range of years 
	
  use bw ga birthyear sib_birthyear sib_mat_s_prove1 sib_dansk_m_prove1 using "$datadir/$data_restat" ///
    if inrange(birthyear, $startyr, $endyr), clear
	gstats sum sib_birthyear if inrange(bw, $ll, $ul) & ga >= 32 & (sib_mat_s_prove1 ~= . | sib_dansk_m_prove1 ~= .), ///
	  nodetail
	local min_yr = r(min)
	local max_yr = r(max)
	
  use "$datadir/$data_restat" if inrange(birthyear, `min_yr', `max_yr'), clear
  gquantiles bw, _pctile nquantiles(2)
  local med_bw = r(r1)
  
  display "Med_bw: `med_bw'"
  
  * Generate list of subsamples
  
  local below "(sib_bw < `med_bw')"
  local above "(sib_bw >= `med_bw')"
  local all "(1 == 1)"
  
	* Generate the lists of controls

	local nc "i.heap50_all"
	local controls "nc"
  
  local a `"mtitles replace prehead("sep=;")"'
	use "$datadir/$data_restat" if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr), clear
	
	* Loop over variables
	
	foreach var of varlist idx_sib_test_scores $sib_test_scores {
    eststo clear
	  foreach smp in above below all {
	    eststo, title(`smp'): rdrobust2 `var' bw if `m32' & $sib_cond & ``smp'', c(1500) h($bw) covs(``controls'')
	  }
  	esttab using "$resdir/mecs_het_endowments.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate `var') ///
  		cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3))) ///
  		star(* 0.10 ** 0.05 *** 0.01) nogaps noobs nonotes compress ///
  		scalars("mean_right Mean outcome" "N Observations") sfmt(%6.3f %6.0fc) `a'
    local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
  }

	log close
	
end


**********************
**** Mechanisms: Some baseline regressions controlling for outcomes (non-causal)
**********************

program define mecs_control_outcomes
	set more off
	
	log using "$logdir\mecs_control_outcomes.log", text replace
		
	local m32 "ga >= 32"
	local l32 "ga < 32"
	
	local nc "i.heap50_all"
	local controls "nc"
  
  local a `"mtitles replace prehead("sep=;")"'
  foreach ga in m32 {
  	use "$datadir/$data_restat" if inrange(bw, $ll, $ul) & inrange(birthyear, $startyr, $endyr), clear
	  eststo clear
	  
		* Focal child test scores controlling for focal child long term health
		
		eststo fc_health, title(FC test score controlling for fc_lt_health): ///
		  rdrobust2 idx_fc_test_scores bw if ``ga'' & $fc_cond, ///
		  c(1500) h($bw) covs(``controls'' idx_fc_lt_health)
	  
		* Sibling test scores controlling for focal child test scores
		
		eststo fc_ts, title(SIB test scores controlling for fc_test_scores): ///
		  rdrobust2 idx_sib_test_scores bw if ``ga'' & $sib_cond, ///
		  c(1500) h($bw) covs(``controls'' idx_fc_test_scores)
	  
		* Sibling test scores controlling for maternal short-term mental health
		
		eststo mom_st_mh, title(SIB test scores controlling for mom_st_mental_health ): ///
		  rdrobust2 idx_sib_test_scores bw if ``ga'' & $sib_cond, ///
		  c(1500) h($bw) covs(``controls'' idx_mom_st_mental_health)
	  
		* Sibling test scores controlling for focal child test scores
		
		eststo fc_ts_mom_st_mh, title(SIB test scores controlling for fc_test_scores+mom_st_mental_health ): ///
		  rdrobust2 idx_sib_test_scores bw ///
		  if ``ga'' & $sib_cond, c(1500) h($bw) covs(``controls'' idx_fc_test_scores idx_mom_st_mental_health)
	  
		* Sibling test scores for siblings born more than 5 years after focal child
		
		eststo sib5, title(SIB test scores if sib_birthyear>5): ///
		  rdrobust2 idx_sib_test_scores bw if ``ga'' & $sib_cond & ///
		  sib_birthyear > birthyear + 5, c(1500) h($bw) covs(``controls'')
	  
		* Sibling test scores for siblings born more than 5 years after focal child, controlling for focal child test scores
		
		eststo sib5_fc_test, title(SIB test scores if sib_birthyear>5 controlling for fc_test_score): ///
		  rdrobust2 idx_sib_test_scores bw if ``ga'' & $sib_cond & ///
		  sib_birthyear > birthyear + 5, c(1500) h($bw) covs(``controls'' idx_fc_test_scores)
	  
		* Sibling test scores for siblings born 1-5 years after focal child
		
		eststo sib14, title(SIB test scores if 1<=sib_birthyear<=5): ///
		  rdrobust2 idx_sib_test_scores bw if ``ga'' & $sib_cond & ///
		  inrange(sib_birthyear, birthyear, birthyear + 5), c(1500) h($bw) covs(``controls'')
	  
		* Sibling test scores for siblings born 1-5 years after focal child, controlling for focal child test scores
		
		eststo sib14_fc_test, title(SIB test scores if 1<=sib_birthyear<=5 controlling for fc_test_score): ///
		  rdrobust2 idx_sib_test_scores bw if ``ga'' & ///
		  $sib_cond & inrange(sib_birthyear, birthyear, birthyear + 5), c(1500) h($bw) covs(``controls'' idx_fc_test_scores)
	}
  	
	esttab using "$resdir/mecs_control_outcomes.csv", scsv keep(RD_Estimate) coeflabels(RD_Estimate VLBW) ///
		cells(b(fmt(3) star pvalue(pv_robust)) tau(par([ ]) fmt(3)) se(par fmt(3))) ///
		star(* 0.10 ** 0.05 *** 0.01) nogaps noobs nonotes compress ///
		scalars("mean_right Mean outcome" "N Observations") sfmt(%6.3f %6.0fc) `a'
  local a "append nomtitles nonumbers nodepvars nolines collabels(none)"
	
	log close

end



*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*
*  RUN EVERYTHING
*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*~*

* dataprep_initial
* dataprep_death_dates
* dataprep_stillborn
* dataprep_academic_achievement
* dataprep_fc_hosp
* dataprep_fc_diagnosis
* dataprep_siblings
* dataprep_add_vars
* dataprep_create_indices
* prelim_bandwidth
* prelim_mccrary
* prelim_figures
* prelim_figures_parents
* prelim_covtests
* prelim_covtests_multinf
* prelim_figures_index
* prelim_sumstat_fc
* prelim_sumstat_bw
* results_baseline_children
* results_baseline_parents
* results_baseline_index
* results_gaps
* robust_bw_poly
* robust_additional
* robust_cutoffs
* mecs_quantile
* mecs_het
* mecs_het_endowments
* mecs_control_outcomes
